From d80723ddb2fa97c0156b7eedfa744edc70a556e5 Mon Sep 17 00:00:00 2001 From: Peng Huo Date: Thu, 22 Jan 2026 15:32:49 -0800 Subject: [PATCH 1/6] pr_collector init Signed-off-by: Peng Huo --- .../resources/sql/2025-06-01-TO-2026-01-21.md | 72859 ++++++++++++++++ .../resources/sql/2025-12-01-TO-2025-12-31.md | 4940 ++ .kiro/scripts/pr_collector/README.md | 88 + .kiro/scripts/pr_collector/pr_collector.py | 301 + .kiro/steering/pr-collector.md | 137 + 5 files changed, 78325 insertions(+) create mode 100644 .kiro/resources/sql/2025-06-01-TO-2026-01-21.md create mode 100644 .kiro/resources/sql/2025-12-01-TO-2025-12-31.md create mode 100644 .kiro/scripts/pr_collector/README.md create mode 100755 .kiro/scripts/pr_collector/pr_collector.py create mode 100644 .kiro/steering/pr-collector.md diff --git a/.kiro/resources/sql/2025-06-01-TO-2026-01-21.md b/.kiro/resources/sql/2025-06-01-TO-2026-01-21.md new file mode 100644 index 0000000000..ef451482fe --- /dev/null +++ b/.kiro/resources/sql/2025-06-01-TO-2026-01-21.md @@ -0,0 +1,72859 @@ +# PR Review Data: opensearch-project/sql +**Date Range:** 2025-06-01 to 2026-01-21 +**Total PRs:** 784 +**Generated:** 2026-01-22T10:36:47.869994 + +--- + +# PR #5062: [Backport 2.19-dev] Add micro benchmarks for unified query layer + +**URL:** https://github.com/opensearch-project/sql/pull/5062 + +**Author:** @dai-chen + +**Created:** 2026-01-21T23:19:31Z + +**State:** MERGED + +**Merged:** 2026-01-22T05:42:45Z + +**Changes:** +242 -2 (5 files) + +**Assignees:** @dai-chen + + +## Description + +Backport cee7f6bc7 from #5043. + +### Notes +- Manual backport required because automated backport failed due to conflicts. +- Conflicts resolved in benchmarks/build.gradle. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5057: Set `max=1` in join as default when `plugins.ppl.syntax.legacy.preferred=false` + +**URL:** https://github.com/opensearch-project/sql/pull/5057 + +**Author:** @LantaoJin + +**Created:** 2026-01-20T10:00:57Z + +**State:** MERGED + +**Merged:** 2026-01-22T05:42:49Z + +**Changes:** +110 -2 (6 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `performance`, `backport 2.19-dev` + + +## Description + +### Description +For performance purpose, set` max=1` in join as default when `plugins.ppl.syntax.legacy.preferred=false` + +### Related Issues +https://github.com/opensearch-project/sql/issues/5056 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5053: [Backport 2.19] Support profile option for PPL - Part II Implement operator level metrics + +**URL:** https://github.com/opensearch-project/sql/pull/5053 + +**Author:** @penghuo + +**Created:** 2026-01-15T22:55:05Z + +**State:** MERGED + +**Merged:** 2026-01-20T03:13:38Z + +**Changes:** +696 -20 (16 files) + + +## Description + +## Description +Backport of #5044 to `2.19-dev`. + +Note: `OpenSearchQueryRequest` was intentionally left as the upstream `2.19-dev` version for manual merge in this backport. Please resolve that file in a follow-up commit. + +## Changes +- cherry-pick `07629d430` (Support profile option for PPL - Part II Implement operator level metrics) +- resolve conflicts for `CalciteToolsHelper` and `OpenSearchExecutionEngine` +- Java 11 lambda capture fix in `CalciteToolsHelper` + +## Testing +- `./gradlew compileJava` + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5051: [Backport 2.19-dev] Push down filters on nested fields as nested queries + +**URL:** https://github.com/opensearch-project/sql/pull/5051 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2026-01-15T06:27:18Z + +**State:** MERGED + +**Merged:** 2026-01-15T08:45:27Z + +**Changes:** +507 -19 (20 files) + + +## Description + +Backport ff82c67d69cb2e9c5df5c5f50cecc2da06b45d95 from #4825. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5050: [Backport 2.19-dev] Remove GetAlias Call + +**URL:** https://github.com/opensearch-project/sql/pull/5050 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2026-01-15T05:48:46Z + +**State:** MERGED + +**Merged:** 2026-01-15T06:18:33Z + +**Changes:** +43 -9 (2 files) + + +## Description + +Backport 74b2fb3dc94d07ac8c22ee9845e6350af9ef45c6 from #4981. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5049: Add OUTPUT as an alias for REPLACE in Lookup + +**URL:** https://github.com/opensearch-project/sql/pull/5049 + +**Author:** @Swiddis + +**Created:** 2026-01-14T21:58:48Z + +**State:** MERGED + +**Merged:** 2026-01-15T19:00:26Z + +**Changes:** +195 -4 (5 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Small syntax update. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `docs/user/ppl/cmd/lookup.md:None` + + +Heh, fun find. Works fine on doc website: +image + +But broken in GH markdown: +image + +Will escape the pipes + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5047: [Backport 2.19-dev] Add unified function interface with function discovery API + +**URL:** https://github.com/opensearch-project/sql/pull/5047 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2026-01-14T18:33:53Z + +**State:** MERGED + +**Merged:** 2026-01-15T04:57:16Z + +**Changes:** +467 -1 (6 files) + + +## Description + +Backport 3f646d5b8cd305c7768d1f524bf76f3d44b1b7d8 from #5039. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5046: [Backport 2.19-dev]Separate explain mode from format params (#5042) + +**URL:** https://github.com/opensearch-project/sql/pull/5046 + +**Author:** @qianheng-aws + +**Created:** 2026-01-14T09:07:04Z + +**State:** MERGED + +**Merged:** 2026-01-15T05:51:38Z + +**Changes:** +523 -152 (41 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/811881bbfb8d046c46fd9afba3f00f489b96d1c2 from https://github.com/opensearch-project/sql/pull/5042. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5044: Support profile option for PPL - Part II Implement operator level metrics + +**URL:** https://github.com/opensearch-project/sql/pull/5044 + +**Author:** @penghuo + +**Created:** 2026-01-13T22:57:17Z + +**State:** MERGED + +**Merged:** 2026-01-15T04:56:27Z + +**Changes:** +693 -27 (17 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +- Added operator-level plan profiling for Calcite execution and exposed it in `profile.plan` with node, `time_ms`, `rows`, and `children`. +- Plan node names now use raw Calcite physical operator names (e.g., `EnumerableCalc`, `CalciteEnumerableIndexScan`). +- Profiling output still includes stage timings (`analyze`, `optimize`, `execute`, `format`) and `summary.total_time_ms`. +- Updated docs and PPL profile YAML test to assert plan output content. + +### Classes +- `PlanProfileBuilder`: builds a profiled RelNode tree and parallel plan-node tree. +- `ProfileEnumerableRel`: wraps EnumerableRel codegen; rewrites the return to `ProfileEnumerableRel.profile(...)`. +- `ProfileEnumerator`: measures per-operator wall-clock time and row counts. +- `ProfileScannableRel`: handles `Scannable` plans to keep scan timing in operator scope. +- `ProfilePlanNode` + `ProfilePlanNodeMetrics`: store per-node metrics and emit JSON snapshot. +``` +PlanProfileBuilder + └─ ProfilePlan (rel + planRoot) + ├─ ProfileEnumerableRel (wraps EnumerableRel) + │ └─ ProfileEnumerator (wraps Enumerator) + └─ ProfileScannableRel (wraps Scannable scan) + +ProfilePlanNode + └─ ProfilePlanNodeMetrics (time + rows) + +QueryProfile + └─ PlanNode (JSON output) +``` + +### Example: Join Plan Interpretation + +For a join query like `source=log00001 | join left=l right=r ON l.packets=r.packets log00001`, the plan section is read top-down: +``` +{ + "profile": { + "summary": { + "total_time_ms": 894.72 + }, + "phases": { + "analyze": { + "time_ms": 543.98 + }, + "optimize": { + "time_ms": 257.68 + }, + "execute": { + "time_ms": 91.14 + }, + "format": { + "time_ms": 0.72 + } + }, + "plan": { + "node": "EnumerableLimit", + "time_ms": 81.15, + "rows": 4, + "children": [ + { + "node": "EnumerableMergeJoin", + "time_ms": 80.93, + "rows": 4, + "children": [ + { + "node": "CalciteEnumerableIndexScan", + "time_ms": 74.72, + "rows": 4 + }, + { + "node": "CalciteEnumerableIndexScan", + "time_ms": 4.04, + "rows": 4 + } + ] + } + ] + } + } +``` +- `EnumerableLimit` is the root operator (final limiting of rows); its `time_ms` includes all child work. +- `EnumerableMergeJoin` is the join operator; its `time_ms` includes both scans plus join processing. +- Each `CalciteEnumerableIndexScan` represents one side of the join; two separate scan nodes indicate two index reads. Note, CalciteEnumerableIndexScan time include a [OpenSearch inital scan time](https://github.com/opensearch-project/sql/pull/5044/changes#diff-99698c6402b61b1d75f4082a3186b90959f4165fcf2ea4c4a18eab36df1f73d5R145-R150) +- `rows` on each node is the number of rows produced by that operator. +- execute time_ms (91.14 ms) is larger than plan total time_ms (81.15 ms). it is time spend in result set compose which is outside operator. + + +### FAQ +#### Meaning of `time_ms` in Operators +- `time_ms` is inclusive wall-clock time for that operator, covering time spent in its own enumerator plus any time spent waiting on children. + +#### Why Exclude Request-Level `time_ms` +- OpenSearch background scanning can prefetch asynchronously; request-level timing can overemphasize internal fetch mechanics rather than what the operator actually “waits on.” +- The goal is operator-level behavior (what the query experiences), so we measure time at the enumerable operator boundary only. + + +### Related Issues +https://github.com/opensearch-project/sql/pull/4983 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +@dai-chen @LantaoJin @qianheng-aws Please review code. Is it follow up of https://github.com/opensearch-project/sql/pull/4983. + + +--- + +# PR #5043: Add micro benchmarks for unified query layer + +**URL:** https://github.com/opensearch-project/sql/pull/5043 + +**Author:** @dai-chen + +**Created:** 2026-01-13T18:21:20Z + +**State:** MERGED + +**Merged:** 2026-01-21T19:50:59Z + +**Changes:** +241 -1 (5 files) + +**Labels:** `infrastructure`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +Add JMH microbenchmarks for the unified query layer to establish baseline performance metrics for query planning, transpilation, compilation, and unified function evaluation. + +#### Changes + +- `UnifiedQueryBenchmark`: benchmarks for query planning, SQL transpilation, and compilation pipelines across common PPL query patterns. +- `UnifiedFunctionBenchmark`: benchmarks for function loading and evaluation covering representative functions from different categories (json, math, conditional, collection, string). + +#### Sample Test Results + +``` +Benchmark (query) Mode Cnt Score Error Units +UnifiedQueryBenchmark.compileQuery source = catalog.employees avgt 5 0.475 ± 0.309 ms/op +UnifiedQueryBenchmark.compileQuery source = catalog.employees | where age > 30 avgt 5 1.514 ± 1.226 ms/op +UnifiedQueryBenchmark.compileQuery source = catalog.employees | stats count() by department avgt 5 2.273 ± 4.164 ms/op +UnifiedQueryBenchmark.compileQuery source = catalog.employees | sort - age avgt 5 0.636 ± 0.348 ms/op +UnifiedQueryBenchmark.compileQuery source = catalog.employees | where age > 25 | stats avg(age) by department | sort - department avgt 5 3.121 ± 3.492 ms/op +UnifiedQueryBenchmark.planQuery source = catalog.employees avgt 5 0.124 ± 0.006 ms/op +UnifiedQueryBenchmark.planQuery source = catalog.employees | where age > 30 avgt 5 0.140 ± 0.029 ms/op +UnifiedQueryBenchmark.planQuery source = catalog.employees | stats count() by department avgt 5 0.210 ± 0.028 ms/op +UnifiedQueryBenchmark.planQuery source = catalog.employees | sort - age avgt 5 0.149 ± 0.016 ms/op +UnifiedQueryBenchmark.planQuery source = catalog.employees | where age > 25 | stats avg(age) by department | sort - department avgt 5 0.153 ± 0.020 ms/op +UnifiedQueryBenchmark.transpileToSql source = catalog.employees avgt 5 0.134 ± 0.021 ms/op +UnifiedQueryBenchmark.transpileToSql source = catalog.employees | where age > 30 avgt 5 0.143 ± 0.022 ms/op +UnifiedQueryBenchmark.transpileToSql source = catalog.employees | stats count() by department avgt 5 0.254 ± 0.051 ms/op +UnifiedQueryBenchmark.transpileToSql source = catalog.employees | sort - age avgt 5 0.155 ± 0.020 ms/op +UnifiedQueryBenchmark.transpileToSql source = catalog.employees | where age > 25 | stats avg(age) by department | sort - department avgt 5 0.178 ± 0.047 ms/op +``` + +``` +Benchmark (benchmarkSpec) Mode Cnt Score Error Units +UnifiedFunctionBenchmark.evalFunction JSON_EXTRACT avgt 5 0.294 ± 0.040 ms/op +UnifiedFunctionBenchmark.evalFunction COALESCE avgt 5 0.367 ± 0.017 ms/op +UnifiedFunctionBenchmark.evalFunction MVFIND avgt 5 0.292 ± 0.115 ms/op +UnifiedFunctionBenchmark.evalFunction REX_EXTRACT avgt 5 0.341 ± 0.059 ms/op +UnifiedFunctionBenchmark.loadFunction JSON_EXTRACT avgt 5 0.293 ± 0.055 ms/op +UnifiedFunctionBenchmark.loadFunction COALESCE avgt 5 0.422 ± 0.105 ms/op +UnifiedFunctionBenchmark.loadFunction MVFIND avgt 5 0.277 ± 0.059 ms/op +UnifiedFunctionBenchmark.loadFunction REX_EXTRACT avgt 5 0.339 ± 0.052 ms/op +``` + +### Related Issues +Resolves partially https://github.com/opensearch-project/sql/issues/4782 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @dai-chen on `benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java:None` + + +Addressed in https://github.com/opensearch-project/sql/pull/5043/commits/3b0315b28c3ba0a98cd32f2402b02b10c4507329. + + +### @RyanL1997 on `benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedFunctionBenchmark.java:125` + + +question: using array list here is to handle the `null` case for `COALESCE` tesitng? + + +### @dai-chen on `benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedFunctionBenchmark.java:125` + + +yes, `List.of` will throw NPE. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5042: Separate explain mode from format params + +**URL:** https://github.com/opensearch-project/sql/pull/5042 + +**Author:** @qianheng-aws + +**Created:** 2026-01-13T08:48:24Z + +**State:** MERGED + +**Merged:** 2026-01-14T08:37:34Z + +**Changes:** +523 -153 (41 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `v3.5.0` + + +## Description + +### Description +Introduce a new query parameter `mode`(allows `SIMPLE`, `STANDARD`, `EXTENDED`, `COST`) specifically for controlling the explain verbosity, while keeping format strictly for output serialization format(allows `JSON`, `YAML` for explain API or command). + +e.g. +``` +POST .../_plugins/_ppl/_explain?mode=extended&format=yaml +{ +"query": "source=..." +} +``` +Before this PR, when setting `format=yaml`, users cannot specify the explain mode anymore with the explain mode bounded to the default `STANDARD`. + +For backward compatibility, it also supports `format=[simple, standard, extended, cost]` and get the same behavior as before + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4940 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @penghuo on `docs/user/ppl/interfaces/endpoint.md:64` + + ++1, mode is more clear then format. +Q, PPL should support format=[simple, extended, cost] until 4.x, then we can deprecated it, right? If yes, we should add deprecated warnning info in resposne. + + +### @qianheng-aws on `docs/user/ppl/interfaces/endpoint.md:64` + + +Do you mean we should keep backward compatible? The current code won't support `format=[simple, extended, cost]` anymore. + + +### @LantaoJin on `docs/user/ppl/interfaces/endpoint.md:64` + + ++1 to add `format=[simple, extended, cost]` as alias of `mode=[simple, extended, cost]` before 4.x and clarify them deprecated, even they are not wildly used, but as a policy of open source community, we don't want to introduce API level breaking changes. + + +### @qianheng-aws on `docs/user/ppl/interfaces/endpoint.md:64` + + +Addressed in https://github.com/opensearch-project/sql/pull/5042/commits/17e9334ed199c74ac248aab27869ed47a4147ea6, `format=[simple, standard, extended, cost]` is supported as well. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5040: Increment version to 3.5.0 + +**URL:** https://github.com/opensearch-project/sql/pull/5040 + +**Author:** @cwperks + +**Created:** 2026-01-12T20:56:30Z + +**State:** MERGED + +**Merged:** 2026-01-12T21:20:44Z + +**Changes:** +5 -5 (4 files) + +**Labels:** `maintenance` + + +## Description + +### Description + +Increment version to 3.5.0 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5039: Add unified function interface with function discovery API + +**URL:** https://github.com/opensearch-project/sql/pull/5039 + +**Author:** @dai-chen + +**Created:** 2026-01-12T17:38:56Z + +**State:** MERGED + +**Merged:** 2026-01-14T18:33:37Z + +**Changes:** +467 -1 (6 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces a **unified function abstraction** that lets PPL functions be represented and evaluated in an engine-agnostic way. A function can be implemented once and reused by external execution engines (e.g., Spark) without duplicating engine-specific logic. + +#### Design Highlights + +1. **Type representation**: types represented as SQL type name strings (e.g., "VARCHAR", "INTEGER") to avoid premature type abstraction, e.g., `UnifiedType`. +2. **Value representation**: function inputs/outputs use plain Java values to keep the API minimal, deferring a richer record model (e.g., `UnifiedRecord`) until required. +3. **Simplified serialization**: functions are pre-compiled to source code strings to avoid `RexNode` serialization complexity. We will improve this later if any security or performance concern arises. +4. **Execution model**: `eval()` currently executes via the pre-compiled code string internally. In the future, we can introduce an explicit `codegen()` API on unified function interface to cleanly separate interpreted vs compiled execution, and to integrate with engine-native codegen pipelines (e.g., Spark WholeStageCodegen). + +#### Key Components + +- `UnifiedFunction`: Serializable interface representing a function with name, input/output types, and evaluation capability. +- `UnifiedFunctionCalciteAdapter`: Calcite-based implementation that adapts existing function implementation of `RexNode`. +- `UnifiedFunctionRepository`: Discovers and loads PPL functions as function descriptors with lazy instantiation via builders. + +``` +┌──────────────────────────────────────────────┐ +│ «interface» │ +│ UnifiedFunction │ +│ «Serializable» │ +├──────────────────────────────────────────────┤ +│ + getFunctionName(): String │ +│ + getInputTypes(): List │ +│ + getReturnType(): String │ +│ + eval(inputs: List): Object │ +└──────────────────────────────────────────────┘ + △ + │ implements + │ +┌──────────────────────────────────────────────┐ +│ UnifiedFunctionCalciteAdapter │ +├──────────────────────────────────────────────┤ +│ - functionName: String │ +│ - returnType: String │ +│ - inputTypes: List │ +│ - compiledCode: String │ +├──────────────────────────────────────────────┤ +│ + create(rexBuilder, functionName, │ +│ inputTypeNames): Adapter │ +│ + eval(inputs): Object │ +└──────────────────────────────────────────────┘ + △ + │ creates + │ +┌──────────────────────────────────────────────┐ +│ UnifiedFunctionRepository │ +├──────────────────────────────────────────────┤ +│ - context: UnifiedQueryContext │ +├──────────────────────────────────────────────┤ +│ + loadFunctions(): │ +│ List │ +│ + loadFunction(name): │ +│ Optional │ +├──────────────────────────────────────────────┤ +│ «inner» UnifiedFunctionDescriptor │ +│ - functionName: String │ +│ - builder: UnifiedFunctionBuilder │ +├──────────────────────────────────────────────┤ +│ «inner» UnifiedFunctionBuilder │ +│ + build(inputTypes): UnifiedFunction │ +└──────────────────────────────────────────────┘ +``` + +#### Future Work + +In the future, we can make unified functions fully independent of Calcite by moving function implementations and discovery off Calcite APIs (in both the unified function implementations and repository). At that point, we can also introduce richer abstractions—such as a unified type and record model—by referencing LinkedIn’s Transport framework https://github.com/linkedin/transport. + +### Related Issues +Resolves partially https://github.com/opensearch-project/sql/issues/4782, https://github.com/opensearch-project/opensearch-spark/issues/1281 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `api/src/main/java/org/opensearch/sql/api/function/UnifiedFunctionRepository.java:51` + + +How does overloaded function resolve works? + + +### @dai-chen on `api/src/main/java/org/opensearch/sql/api/function/UnifiedFunctionRepository.java:51` + + +It's similar to our V2/Spark's function builder which returns actual function implementation based on the given function argument type(s). + + +### @dai-chen on `api/src/main/java/org/opensearch/sql/api/function/UnifiedFunctionRepository.java:78` + + +Addressed in https://github.com/opensearch-project/sql/pull/5039/commits/3488e4dd782d4055a880d1e8f5bd99e487adc0da. + + +### @dai-chen on `api/src/main/java/org/opensearch/sql/api/function/UnifiedFunctionRepository.java:40` + + +Addressed in https://github.com/opensearch-project/sql/pull/5039/commits/3488e4dd782d4055a880d1e8f5bd99e487adc0da. + + +### @dai-chen on `api/README.md:121` + + +Addressed in https://github.com/opensearch-project/sql/pull/5039/commits/3488e4dd782d4055a880d1e8f5bd99e487adc0da. + + +### @dai-chen on `api/README.md:None` + + +Addressed in https://github.com/opensearch-project/sql/pull/5039/commits/3488e4dd782d4055a880d1e8f5bd99e487adc0da. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5037: [Backport 2.19-dev]Introduce logical dedup operators for PPL (#5014) + +**URL:** https://github.com/opensearch-project/sql/pull/5037 + +**Author:** @qianheng-aws + +**Created:** 2026-01-12T02:47:30Z + +**State:** MERGED + +**Merged:** 2026-01-12T05:23:14Z + +**Changes:** +915 -414 (124 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/22669da3f6e141e594e80915de9ad64c2ca1a054 from https://github.com/opensearch-project/sql/pull/5014. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5036: [Backport 2.19-dev] Fix PIT context leak in Legacy SQL for non-paginated queries + +**URL:** https://github.com/opensearch-project/sql/pull/5036 + +**Author:** @aalva500-prog + +**Created:** 2026-01-10T03:45:10Z + +**State:** MERGED + +**Merged:** 2026-01-13T16:36:03Z + +**Changes:** +391 -84 (4 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/5bf322ff8b006c94524fdfb3c748ae72c2ac4232 from https://github.com/opensearch-project/sql/pull/5009 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5035: [Backport 3.1] Fix PIT context leak in Legacy SQL for non-paginated queries + +**URL:** https://github.com/opensearch-project/sql/pull/5035 + +**Author:** @aalva500-prog + +**Created:** 2026-01-10T03:02:01Z + +**State:** MERGED + +**Merged:** 2026-01-13T16:35:56Z + +**Changes:** +475 -72 (9 files) + + +## Description + +Backport 5bf322f from https://github.com/opensearch-project/sql/pull/5009 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5033: Add index.md for PPL functions documentation + +**URL:** https://github.com/opensearch-project/sql/pull/5033 + +**Author:** @ritvibhatt + +**Created:** 2026-01-09T22:10:17Z + +**State:** MERGED + +**Merged:** 2026-01-15T22:54:20Z + +**Changes:** +238 -0 (1 files) + +**Labels:** `documentation`, `PPL` + + +## Description + +### Description +Add index.md under PPL functions directory to make it clear what functions are supported with PPL +- Lists all categories of functions and all functions in each category + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5032: [Backport 2.19-dev] Support profile options for PPL - Part I Implement phases level metri… + +**URL:** https://github.com/opensearch-project/sql/pull/5032 + +**Author:** @penghuo + +**Created:** 2026-01-09T17:27:02Z + +**State:** MERGED + +**Merged:** 2026-01-12T02:28:51Z + +**Changes:** +669 -25 (23 files) + + +## Description + +(cherry picked from commit 47709a05afa0419fccd4c37e6ec5cc8066dd8292) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5030: [Backport 2.19-dev] Add Frequently Used Big5 PPL Queries + +**URL:** https://github.com/opensearch-project/sql/pull/5030 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2026-01-08T02:44:44Z + +**State:** MERGED + +**Merged:** 2026-01-08T05:24:35Z + +**Changes:** +157 -2 (7 files) + + +## Description + +Backport b66dc12b21ce3f7311fc4412604192b497e1133a from #4976. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5028: Implement spath command with field resolution + +**URL:** https://github.com/opensearch-project/sql/pull/5028 + +**Author:** @ykmr1224 + +**Created:** 2026-01-07T23:49:29Z + +**State:** MERGED + +**Merged:** 2026-01-15T23:43:27Z + +**Changes:** +2991 -168 (39 files) + +**Labels:** `PPL`, `feature`, `backport-failed`, `calcite`, `backport 2.19-dev` + +**Assignees:** @ykmr1224 + + +## Description + +### Description +- Implement spath command with field resolution +- This change is for step1 (spath limited version) in https://github.com/opensearch-project/sql/issues/4984 +- Identify referred fields from query and extract only the referred fields from JSON. + - Raise error when referred fields cannot be identified (like when no `fields` command) or wildcard is used. +- All the fields are extracted as STRING regardless of the actual value type in JSON + - Later it would be extended to ANY, but that requires functions to handle ANY well. +- Originally planned to use `mvappend` function, but added new internal `append` function to avoid returning single element array. (`mvappend` will always make array regardless of array size, this is consistent with other array functions in PPL. `append` will return scalar value when the result become single element, this is similar to Splunk behavior where single element array is automatically converted to scalar value.) + - This prevents fields not merged by `spath` command becoming an array. + +### Related Issues +https://github.com/opensearch-project/sql/issues/4984 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/spath.md:39` + + +Are these limitations will be addressed in following PR? + + +### @penghuo on `docs/user/ppl/cmd/spath.md:18` + + +`Field Resolution-based` is a solution, not user facing feature? Or we want to tell user spath will auto extract fields reference in the query. + + +### @penghuo on `docs/user/ppl/cmd/spath.md:142` + + +Add a test case to demo field merge capability. +`{"a.b", 1, "a":{"b":2}}`, results will be `"a.b": [1, 2]` + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java:225` + + +error message will return to user? it is not actionable. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java:224` + + +This function not use CalcitePlanContext info. Could fit into analyzer module. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java:112` + + +Consider extracting per‑command “required field” logic to reducing duplication when new commands are added. + + +### @ykmr1224 on `docs/user/ppl/cmd/spath.md:39` + + +Yes, do we want to note those will be addressed in near future? (I suppose those would be in later version) + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java:225` + + +It would be 5xx error as it throws IllegalStateException. + + +### @ykmr1224 on `docs/user/ppl/cmd/spath.md:18` + + +Yes, it is actually more like a internal solution to realize `spath` command. +As it is interim solution until the dynamic fields are added in the next step, I'd like to keep it as it is. +Once dynamic fields are added, we don't need to mention about `field resolution-based extraction`. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java:112` + + +Per‑command “required field” logic depends on the command logic, which specific part do you expect to be extracted? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java:224` + + +This function refers `rootNode` in CalcitePlanContext, and that's why I put it here. + + + +### @ykmr1224 on `docs/user/ppl/cmd/spath.md:142` + + +Added example. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5026: [DOC] Add legacy ppl/index.rst for backwards compatibility + +**URL:** https://github.com/opensearch-project/sql/pull/5026 + +**Author:** @kylehounslow + +**Created:** 2026-01-07T22:05:37Z + +**State:** MERGED + +**Merged:** 2026-01-08T00:44:13Z + +**Changes:** +2 -0 (1 files) + +**Labels:** `documentation` + + +## Description + +### Description +There are broken links to legacy `docs/user/ppl/index.rst` file in the [AWS docs](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/observability-analyze-logs.html) and older [blog posts](https://opensearch.org/blog/better-observability-deeper-insights-opensearchs-new-piped-processing-language-capabilities). This PR adds a reference `docs/user/ppl/index.rst` file with link to index.md file to support backwards compatibility. + +### Testing + +See example rendered file here: [docs/user/ppl/index.rst](https://github.com/kylehounslow/sql/blob/1627a541e21b4a948d268f411d3e1a57a98c5d9f/docs/user/ppl/index.rst) + +### Check List +- [N/A] New functionality includes testing. +- [N/A] New functionality has been documented. + - [N/A] New functionality has javadoc added. + - [N/A] New functionality has a user manual doc added. +- [N/A] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) all confirmed. +- [N/A] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [N/A] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `docs/user/ppl/index.rst:2` + + +^ doesn't matter for github rst rendering, & i don't think portability is a concern + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5024: [Backport 2.19-dev] Add unified query compiler API + +**URL:** https://github.com/opensearch-project/sql/pull/5024 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2026-01-07T18:01:19Z + +**State:** MERGED + +**Merged:** 2026-01-08T02:42:46Z + +**Changes:** +554 -42 (10 files) + + +## Description + +Backport 46302b7e05243cc0dc1bcc139e2da0f4f75d95f4 from #4974. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5023: [Backport 2.19-dev] Support read multi-values from OpenSearch if no codegen triggered (#5015) + +**URL:** https://github.com/opensearch-project/sql/pull/5023 + +**Author:** @LantaoJin + +**Created:** 2026-01-07T07:09:25Z + +**State:** MERGED + +**Merged:** 2026-01-07T09:34:20Z + +**Changes:** +115 -190 (6 files) + + +## Description + +(cherry picked from #5015 commit 98516d6e17ad2281f173a7bb0f2d5d1d9232b1ee) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5022: [Backport 2.19-dev] ML command supports category_field parameter (#3909) + +**URL:** https://github.com/opensearch-project/sql/pull/5022 + +**Author:** @LantaoJin + +**Created:** 2026-01-07T07:04:12Z + +**State:** MERGED + +**Merged:** 2026-01-07T09:34:12Z + +**Changes:** +60 -17 (2 files) + + +## Description + +(cherry picked from #3909 commit 661cb8d3d536d6a04fa020d1d53731aae31af558) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5020: Clean up maven workflows for 2.19-dev branch + +**URL:** https://github.com/opensearch-project/sql/pull/5020 + +**Author:** @ahkcs + +**Created:** 2026-01-06T22:27:09Z + +**State:** MERGED + +**Merged:** 2026-01-06T23:06:28Z + +**Changes:** +1 -138 (3 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Clean up maven snapshot upload workflows for 2.19-dev branch + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5019: Add version numbers for all settings in the docs + +**URL:** https://github.com/opensearch-project/sql/pull/5019 + +**Author:** @Swiddis + +**Created:** 2026-01-06T22:02:34Z + +**State:** MERGED + +**Merged:** 2026-01-14T22:16:21Z + +**Changes:** +148 -19 (2 files) + +**Labels:** `skip-changelog` + + +## Description + +### Description +While debugging an oncall ticket, having the version number specified for [plugins.query.buckets](https://github.com/opensearch-project/sql/blob/b66dc12b21ce3f7311fc4412604192b497e1133a/docs/user/ppl/admin/settings.md?plain=1#L168-L172) was super useful. I was so inspired I wanted to do it with all the rest of our settings, too. + +### Related Issues +Minor inconveniences with finding the version in which a config became available. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +So you manually checked all commits to identify the introduce versions, did you? I am not sure should reviewer check them again. I approve this changes without checking the specific version values. + + +### @RyanL1997 - APPROVED + + +Thanks for the change, and I was wondering if there is also a similar way to handle the deprecation of a config? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5018: [Backport 2.19-dev] Backport util file for maven snapshot upload + +**URL:** https://github.com/opensearch-project/sql/pull/5018 + +**Author:** @ahkcs + +**Created:** 2026-01-06T19:02:33Z + +**State:** MERGED + +**Merged:** 2026-01-06T21:57:10Z + +**Changes:** +14 -0 (1 files) + + +## Description + +### Description +Backport util file for maven snapshot upload workflow from main to 2.19-dev branch + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5017: [Backport 2.19-dev] [BugFix] Not between should use range query + +**URL:** https://github.com/opensearch-project/sql/pull/5017 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2026-01-06T08:21:47Z + +**State:** MERGED + +**Merged:** 2026-01-07T08:29:15Z + +**Changes:** +34 -9 (4 files) + + +## Description + +Backport b39d803a21f50ec16b46bcdb1b4db9fc05ae041e from #5016. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5016: [BugFix] Not between should use range query + +**URL:** https://github.com/opensearch-project/sql/pull/5016 + +**Author:** @qianheng-aws + +**Created:** 2026-01-06T07:15:38Z + +**State:** MERGED + +**Merged:** 2026-01-06T08:21:32Z + +**Changes:** +31 -8 (3 files) + +**Labels:** `pushdown`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Not between should use range query instead of script query. It's caused by Range::lowerBoundType/upperBoundType will throw NPE if its upper/lower Bound is null, therefore it falls back to script push down. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4903 +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5015: Support read multi-values from OpenSearch if no codegen triggered + +**URL:** https://github.com/opensearch-project/sql/pull/5015 + +**Author:** @LantaoJin + +**Created:** 2026-01-06T06:49:24Z + +**State:** MERGED + +**Merged:** 2026-01-07T05:41:22Z + +**Changes:** +115 -189 (6 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +PPL with Calcite enabling cannot handle the fields with multi-values. The issue was described in https://github.com/opensearch-project/sql/issues/4173. + +https://github.com/opensearch-project/sql/pull/4909 (closed) provided a mitigation that handle those array columns by picking the **first** value instead of failure as a short-term solution. + +This PR provides another solution to align with the behavior of v2 **if a PPL query is fully pushdown (no codegen triggered)**. It could resolve the issue of PPL failures in Discover UI. + +### Related Issues +Resolves #4173 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @qianheng-aws - COMMENTED + + +[question] What if there is codegen for the final plan? + + +## Review Comments + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:None` + + +`resultSet.getObject(columnName)` works. reflection is not necessary? + + +## General Comments + + +### @LantaoJin + + +> [question] What if there is codegen for the final plan? + +Updated the description. This solution only resolve the failures when there is no codegen triggered. + + +--- + +# PR #5014: Introduce logical dedup operators for PPL + +**URL:** https://github.com/opensearch-project/sql/pull/5014 + +**Author:** @qianheng-aws + +**Created:** 2026-01-05T08:55:32Z + +**State:** MERGED + +**Merged:** 2026-01-09T06:09:20Z + +**Changes:** +911 -414 (124 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Introduce logical dedup operators for PPL, this PR made this by: +- Add rule to simplify `Project-FIlter-ProjectWindow-Filter` to `LogicalDedup` +- Simplify `DedupPushDownRule` by matching `Dedup-Project-Scan` pattern +- Add `PPLDedupConvertRule` to convert a `LogicalDedup` operator to an equivalent operator composition(project -> filter -> project(OVER)) + +Additional change: +- Move `mergeAdjacedFilter` to the place before prepareStatement. Add the new added `PPLSimplifyDedupRule` in the HepPlanner as well. +- Fix a bug in Project push down rule, it will produce `$fx` to replace the original correct field name. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/5013 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - DISMISSED + + +LGTM + + +### @LantaoJin - DISMISSED + + +Please resolve the AI reviews. + + +## Review Comments + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +`dedup 2 age, account_number` is the query previous to this test case... + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchRelFieldTrimmer.java:32` + + +can you add javadoc for this class and its public methods + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java:1016` + + +can `_row_number_join_max_dedup_` be retired, if we don't distinguish them + + +### @qianheng-aws on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java:1016` + + +yes + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java:88` + + +The _row_number must be in the last position of the project. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java:108` + + +Copied from the `DedupPushDownRule`, remove it since we have predicate in the rule match. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java:120` + + +keepEmpty=true requires `Or(isNull, ..., LessThan)` condition in filter, has already check tha. + + +## General Comments + + +### @penghuo + + +2 Questions, + +- Can the newly introduced operator be translated into SQL? +- What alternative options were considered? Is the limitation due to the DSL lacking sufficient expressiveness to represent the required relational operators? If so, we could consider extending the DSL or implementing a dedicated execution layer. + +Add comments in https://github.com/opensearch-project/sql/issues/5005 also + + +### @qianheng-aws + + +``` +OPTION1 +1. visit DEDUP ast +2. translate to DEDUP operator +3.1 convert to composite operators pattern + 3.1.1 convert to Substrait + 3.1.2 convert to Enumerable operator + 3.1.3 convert to SqlNode and do validation[pending] + 4. simplify to DEDUP operator and then back to 3.x +3.2 push down to top_hits DSL(Pruning and skip 3.1) + +OPTION2 +1. visit DEDUP ast +2. translate to composite operators pattern +3.1 simplify to DEDUP operator before optimizing + 4.1 push down to top_hits DSL(Pruning and skip 4.2) + 4.2 convert to composite operators pattern + 5. convert to Enumerable operator +3.2 convert to Substrait +3.3 convert to SqlNode and do validation, then back to 3.1[pending] +``` +@ltjin @yuanchu @penghuo Comparing to OPTION2, the step of simplify to DEDUP in OPTION1 is not necessary for now before introducing SqlNode validation. For translating to Substrait plan, the rule of convert to composite operators pattern is already there and can be reused. + + +### @qianheng-aws + + +> 2 Questions, +> +> * Can the newly introduced operator be translated into SQL? +> * What alternative options were considered? Is the limitation due to the DSL lacking sufficient expressiveness to represent the required relational operators? If so, we could consider extending the DSL or implementing a dedicated execution layer. +> +> Add comments in #5005 also + +1. ~~Yes, but should call `convertCustomizedRelNode` before translating~~ The latest change in https://github.com/opensearch-project/sql/pull/5014/commits/c04f08f751d8ac06cc55d058b7535b60c461aec3 revert the plan back, it doesn't require `convertCustomizedRelNode` anymore. +2. The limitation comes from non-atomic expression on operators for `DEDUP`, while we need to push that down. Therefore, our push down rule for it is very complex and introduce more load on the optimizer. +dedup command(atomic) -> logical operators(non-atomic before this PR) -> top_hits DSL(atomic) + + +--- + +# PR #5012: [Backport 2.19-dev] Support nested aggregation when calcite enabled (#4979) + +**URL:** https://github.com/opensearch-project/sql/pull/5012 + +**Author:** @LantaoJin + +**Created:** 2026-01-05T05:47:13Z + +**State:** MERGED + +**Merged:** 2026-01-05T08:47:18Z + +**Changes:** +1182 -211 (43 files) + + +## Description + +(cherry picked from #4979 commit 77633ef589d839c57c4c00fdc8a86d70a02d74d8) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5009: Fix PIT context leak in Legacy SQL for non-paginated queries + +**URL:** https://github.com/opensearch-project/sql/pull/5009 + +**Author:** @aalva500-prog + +**Created:** 2025-12-31T23:19:12Z + +**State:** MERGED + +**Merged:** 2026-01-08T22:59:29Z + +**Changes:** +387 -50 (4 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `backport 3.1`, `bugFix` + + +## Description + +### Description +This PR fixes Point-in-Time (PIT) context leak in the Legacy SQL engine when executing queries without `fetch_size` parameter. + +**Problem:** +The Legacy SQL engine was creating PIT contexts for ALL queries but only cleaning them up when cursors were created (paginated queries with `fetch_size > 0`). Non-paginated queries leaked PITs, causing accumulation until the 300 PIT limit was exhausted and queries to fail. + +**Solution:** +- Only create PIT when `fetch_size > 0` and not `null` (pagination requested) + +**Impact:** +- Non-paginated queries no longer leak PIT contexts +- Paginated queries continue to work correctly with cursor-based PIT management + +### Related Issues +Resolves #5002 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the fix! + + +## Review Comments + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:100` + + +Wrong suggestion, there is no compilation error as the executeCursorCloseQuery method is not missing - it's already implemented in the parent class SQLIntegTestCase and is being correctly inherited by PointInTimeLeakIT. + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Will take care of it + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Will take care of it + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +issue (non-blocking): Why do we need this catch block? The finally block does the same thing? + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +issue (readability): It's not obvious how this logic avoids deleting the cursor for paginated queries. + +`cursorCreated` could stand to be more clearly named, and all the cursor config could be split out to a smaller method to make it easier to follow this branching. `isDefaultCursor` isn't a very descriptive name either + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +I'll remove the catch block and leave the finally block, thanks! + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Sure, will simplify the implementation and make it more readable, thanks! + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:None` + + +Please remove all sysout + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Could you separate the logic in 2 methods instead of flow control by this flag? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:99` + + +What is this used for? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Not necessary because finally block will executed anyway? + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Yeah, I'm fixing it, thanks! + + +### @vamsimanohar on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:102` + + +can you explain the logic here in words? What does fetchsize mean here? + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Sure, I'm working on it, thanks! + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:102` + + +fetchsize is the size of the fetch_size parameter of the query, I'm only creating a PIT if the user specifies the fetch_size parameter and it is greater than zero. Before this, the PIT was always created regardless the user providing fetchsize in the query or not. My logic is to avoid creating PIT if `fetchSize` is 0 or not specified, meaning no pagination is needed and no PIT is created. Hope that makes sense. + + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:99` + + +It was originally in the code, will take a further look and update here, thanks! + +__Update:__ Ran the integration tests without this line and got failures in `CursorIT`, `PrettyFormatResponseIT`, `SQLCorrectnessIT`, `PaginationFallbackIT`, and `PaginationIT`. + +According to my investigation, the call to `queryAction.explain()` is necessary because it builds the OpenSearch `SearchRequestBuilder` before executing the query. Without it, the `request` field in `DefaultQueryAction` remains null, which causes failures when `queryAction.getRequestBuilder()` is called later. + +The original code was: + +```java +SqlOpenSearchRequestBuilder sqlOpenSearchRequestBuilder = queryAction.explain(); +``` + +Since the returned variable is never used, I simplified it to: + +```java +queryAction.explain(); +``` + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:None` + + +np: I recall org.json supports JSON path. Could you check `query()` or `optQuery()` API? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Thanks for the refactor. It looks much cleaner! + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:None` + + +Sure, I'll modify this method accordingly, thanks! + + +## General Comments + + +### @vamsimanohar + + +Have you also checked in other scenarios if the PIT contexts are closed properly + + +### @aalva500-prog + + +> Have you also checked in other scenarios if the PIT contexts are closed properly + +@vamsimanohar The PIT context leak only happens in Legacy engine when the index name has special characters like `test-logs-2025.01.01`, it is not wrapped in backticks, and the fecth_size is not provided. It doesn't happen when the index name is like `test-logs-2025-01-01`, as it never falls to Legacy engine and it is handled by V2/V3. The IT class `PointInTimeLeakIT` should cover such scenarios, specifically this test case `testCompareV1AndV2EnginePitBehavior` compares V1 vs V2 by using backticks around the index so that the query is handle by V2 only. Note: I did manual testing in my own local cluster and AOS domain to confirm the same. + + +--- + +# PR #5007: [Backport 2.19-dev] adding capability in SQLQueryUtils to identify if SQL query is for creating a table or not + +**URL:** https://github.com/opensearch-project/sql/pull/5007 + +**Author:** @Parasjg + +**Created:** 2025-12-30T07:45:44Z + +**State:** MERGED + +**Merged:** 2026-01-07T02:21:32Z + +**Changes:** +100 -7 (2 files) + + +## Description + +BackPorting Already Merged PR (https://github.com/opensearch-project/sql/pull/4029) - https://github.com/opensearch-project/sql/commit/5cb51814ed1bb527c47a5e827c773a1899c081e9 +from main to 2.19-dev +### Description +Adding capability in SQLQueryUtils to identify if SQL query is for creating a table or not. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + +### Check List +- [NA] New functionality has been documented. + - [NA] New functionality has javadoc added. + - [NA] New functionality has a user manual doc added. +- [NA] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) all confirmed. +- [NA] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [NA] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5003: [Backport 2.19-dev] Support enumerable TopK + +**URL:** https://github.com/opensearch-project/sql/pull/5003 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-29T07:08:42Z + +**State:** MERGED + +**Merged:** 2025-12-29T09:32:07Z + +**Changes:** +334 -217 (39 files) + + +## Description + +Backport 08be6f92fecdf6ea14cbcb38762677e2dcfcf85d from #4993. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5000: [Backport 2.19-dev] Prune old in operator push down rules (#4992) + +**URL:** https://github.com/opensearch-project/sql/pull/5000 + +**Author:** @qianheng-aws + +**Created:** 2025-12-26T09:41:39Z + +**State:** MERGED + +**Merged:** 2025-12-29T06:24:06Z + +**Changes:** +471 -385 (110 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4992 commit https://github.com/opensearch-project/sql/commit/0ecb0a95b252401474d78fb45abafa30cb31453d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4997: Apply feedback from documentation-website to PPL command docs + +**URL:** https://github.com/opensearch-project/sql/pull/4997 + +**Author:** @ritvibhatt + +**Created:** 2025-12-24T23:43:31Z + +**State:** MERGED + +**Merged:** 2026-01-09T17:29:20Z + +**Changes:** +4354 -3422 (60 files) + +**Labels:** `documentation`, `enhancement` + + +## Description + +### Description +- Apply feedback from technical writers from documentation-website PR (https://github.com/opensearch-project/documentation-website/pull/11688) to SQL repo to keep documentation in sync +- Update script to auto-convert codeblock tables to markdown tables, support single directory export, remove empty tables, improve Jekyll angle bracket/asterisk escaping, convert markdown emphasis to Jekyll attributes +- Add README with SOP for docs exporter script + +When the docs exporter script is applied now, the differences from the docs website are: links to sections not yet ported to documentation-website are commented out in the docs website (functions, admin), whitespace differences, different nav order for some command files, and some differences in escape characters for angle brackets and asterisks. Everything except the commented out admin links will be addressed in the next documentation-website PR (https://github.com/opensearch-project/documentation-website/pull/11747). + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @kylehounslow + + +Thanks @ritvibhatt! Does this PR enable zero diff when re-exporting back to `documentation-website`? Or are there manual changes that need to happen? + + +### @ritvibhatt + + +> When the docs exporter script is applied now, the differences from the docs website are: links to sections not yet ported to documentation-website are commented out in the docs website (functions, admin), whitespace differences, different nav order for some command files, and some differences in escape characters for angle brackets and asterisks. Everything except the commented out admin links will be addressed in the next documentation-website PR ([opensearch-project/documentation-website#11747](https://github.com/opensearch-project/documentation-website/pull/11747)). + +@kylehounslow It still has a few differences right now: +- links to sections not yet ported to documentation-website are commented out in the docs website (functions, admin) +- some whitespace differences and different nav order for some command files (looks like documentation website skips some numbers for nav order) +- some differences with adding escape characters for angle brackets and asterisks (causing some problems with examples rendering in documentation website) +Raised another PR for the documentation website that should address everything except the commented out admin links: https://github.com/opensearch-project/documentation-website/pull/11747. + + + +--- + +# PR #4996: Sync up this path publish-async-query-core.yml from main to 2.19-dev and also changing the JAVA to 17 + +**URL:** https://github.com/opensearch-project/sql/pull/4996 + +**Author:** @Parasjg + +**Created:** 2025-12-24T10:13:26Z + +**State:** MERGED + +**Merged:** 2026-01-06T17:46:57Z + +**Changes:** +17 -7 (2 files) + + +## Description + +Sync up this path publish-async-query-core.yml from main to 2.19-dev and also changing the JAVA to 17 + +### Description +Sync up this path publish-async-query-core.yml from main to 2.19-dev and also changing the JAVA to 17 + +### Related Issues +Resolving issue related to java version when upadating the async-query-core jars in DQS + +### Check List +- [NA] New functionality includes testing. +- [NA] New functionality has been documented. + - [NA] New functionality has javadoc added. + - [NA] New functionality has a user manual doc added. +- [NA] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) all confirmed. +- [NA] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [NA] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ahkcs - APPROVED + + +LGTM + + +## Review Comments + + +### @LantaoJin on `.github/workflows/publish-async-query-core.yml:38` + + +shouldn't be 11 for 2.19? + + +### @Parasjg on `.github/workflows/publish-async-query-core.yml:38` + + +We connected with Kai Huang , he suggested 17 could work . Do let me know if we can't go ahead with this one. + + +### @ahkcs on `.github/workflows/publish-async-query-core.yml:38` + + +We want to use JDK 17 for async query core jar here and main branch doesn't support 17, that's why I suggested them to backport the maven snapshot upload logic to 2.19-dev branch with JDK 17 supported, please let me know if there's any concerns for this + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4995: [Backport 2.19-dev] Dedup pushdown (TopHits Agg) should work with Object fields (#4991) + +**URL:** https://github.com/opensearch-project/sql/pull/4995 + +**Author:** @LantaoJin + +**Created:** 2025-12-24T09:07:32Z + +**State:** MERGED + +**Merged:** 2025-12-26T07:31:16Z + +**Changes:** +53 -27 (6 files) + + +## Description + +(cherry picked from #4991 commit 1192376a7b0856375b3dbea6c54ed3420e593b7d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4993: Support enumerable TopK + +**URL:** https://github.com/opensearch-project/sql/pull/4993 + +**Author:** @LantaoJin + +**Created:** 2025-12-23T12:14:09Z + +**State:** MERGED + +**Merged:** 2025-12-29T06:43:07Z + +**Changes:** +334 -217 (39 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Support enumerable TopK, check #4982 for issue details. + +- EnumerableTopKConverterRule + - Convert `LogicalSort` with `fetch` to `CalciteEnumerableTopK` +- EnumerableTopKMergeRule + - Merge `EnumerableLimit` and `EnumerableSort` to `CalciteEnumerableTopK` + + +The `CalciteEnumerableTopK` is derived from `EnumerableLimitSort` which has the corrected cost computation. + +### Related Issues +Resolves #4982 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - DISMISSED + + +LGTM + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableTopKRule.java:None` + + +Do we need copy the license from Calcite? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +change `{"field":"state.keyword","missing_bucket":false,"order":"asc"}` to`{"field":"state.keyword","missing_bucket":false,"order":"desc"}` + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml:None` + + +ditto + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableTopKRule.java:None` + + +No, I think. it's not 100% copy + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java:43` + + +Can `ENUMERABLE_TOP_K_RULE` be eliminated with `ENUMERABLE_TOP_K_MERGE_RULE`? + +It seems the former converts limit+sort from logical to enumerable top-k, while the latter converts limit+sort from enumerable ones to enumerable top-k. But the former will be converted to limit+sort enumerable plan if without the converter rule. The merge rule seem to cover this case + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java:43` + + +No before #4992 , let me try it now, will remove it if no explain case fails + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java:43` + + +Thanks 4992, removing EnumerableTopKConverterRule works now. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:440` + + +[question] Curious when and how could it happens with this change? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +Shall we prevent push down `CalciteEnumerableTopK` in `SortIndexScanRule`? I think we should only push down an `EnumerableSort` for a physical level plan. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableTopK.java:20` + + +Missing Override `copy` method. If not, this class will downgrade to its parent later when do copy. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:440` + + +The issue seems only happens without pruning. Now I cannot reproduce it. But for multisearch test `testExplainMultisearchTimestampInterleaving`, we can remove the second duplicated LIMIT pushdown: `LIMIT->5, LIMIT->5`. We can keep this logic. + + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableTopK.java:20` + + +added + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableTopK.java:20` + + +> Missing Override `copy` method. If not, this class will downgrade to its parent later when do copy. + +@coderabbitai This is a good finding. Can you learn something from it? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +fixed + + +## General Comments + + +### @LantaoJin + + +Let's suspend on merging until https://github.com/opensearch-project/sql/pull/4992 merged + + +--- + +# PR #4992: Prune old in operator push down rules + +**URL:** https://github.com/opensearch-project/sql/pull/4992 + +**Author:** @qianheng-aws + +**Created:** 2025-12-23T08:45:47Z + +**State:** MERGED + +**Merged:** 2025-12-26T07:30:22Z + +**Changes:** +451 -384 (110 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR primally supports pruning the old operator if we get a better plan than before. Therefore, it will improve the efficiency of planning process by avoid exploring meaningless equivalent plans. + +The performance gain is shown below: +1. Average query cost on big5 and clickbench: + +  | previous | pruneOld +-- | -- | -- +big5 | 17ms | 14ms +clickbench | 26 ms | 19ms + +2. Average optimization cost on big5 and clickbench (By enable calcite's debug mode and adding timingTracer, it will induce more time cost than before) + + +  | previous | pruneOld +-- | -- | -- +big5 | 36.3 ms | 25.9 ms +clickbench | 265 ms | 110.9 ms + +3. Average number of applied rules on big5 and clickbench: + + +  | previous | pruneOld +-- | -- | -- +big5 | 114 | 50 +clickbench | 474 | 215 + +Some positive cases on plan with this PR: +- testDedupRename +- testCasePushdownAsRangeQueryExplain +- testExplainOnAggregationWithFunction +- testDedupExpr + +### Implementation Details + +As described in https://github.com/opensearch-project/sql/issues/4931#issuecomment-3645518565, there is also many issues and bug spotted after pruning old. So there is additional change to fix them and make it compatible: + +- As there is Subset reuse in Calcite, pruning a Subset which is the only child of other Subset will cause preparing failure. So we should prune the old from top to down and stop if the current node cannot be pruned. One node cannot be pruned if it's physical node(see the point5 in the above comment) or it has multiple parents(except the root of the call, as we are generating a new root to replace it). +- Make `PPLAggregateConvertRule`, `PPLAggGroupMergeRule`, 'RareTopPushdownRule', 'DedupPushDownRule' implements `SubstitutionRule` so they will get higher priority on rule match and then we can get optimized aggregates in RelSubset before pruning. +- Support removing `project`, `sort` and agg derived `filter` when doing aggregate push down. +- Slightly refactor `AggregateIndexScanRule` so it can support pushing down on more cases +- Refactor and simplify `DedupPushDownRule` so it can get compatible with the current pruning mechanism +- Continue pushing down limit if it can reduce the estimated row count. +- Fix several bugs in `AggregateAnalyzer` when the project is null. See UT in `AggregateAnalyzerTest`, its expected results is wrong before. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4931 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +I'd like to verify the checksum of clickbench result before being merged + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:746` + + +How does the condition `volcanoPlanner.getSubsetNonNull(rel).getParentRels().size() == 1` guarantee that the rel node's parent is pruned? + +Is it because that if the index is greater than 0, then index 0 must have been traversed? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:36` + + +Why do some rules implement `SubstitutionRule`, while some don't? + + + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:47` + + +should be wrapped in `InterruptibleRelRule`? +``` +public class PPLAggGroupMergeRule extends InterruptibleRelRule implements SubstitutionRule +``` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:107` + + +how about move `tryPruneRelNodes(call)` into `InterruptibleRelRule`? +``` + onMatchImpl(call); ++ if (this instanceof SubstitutionRule) { ++ tryPruneRelNodes(call); ++ } +``` + + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml:11` + + +Critical: the original query is `count() by UserID, SearchPhrase`, the order matter for results + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:208` + + +I'm curious why can this rule be safely eliminated + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml:11` + + +ditto + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml:10` + + +Critical: Seems a bad case? the group key is an expression which should be pushed to script agg pushdown + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:36` + + +Good question! We need to update dev-doc to explain how to choose proper base rule. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:746` + + +Yes, if the current RelNode has only 1 parent, it must be the RelNode in `call.rels[current_offset - 1]` and it has been pruned in the previous step of this stream + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:746` + + +The best way is looking up its all parents in `VolcanoPlanner.prunedNodes` while its unaccessible + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:208` + + +I refactor the dedup push down rule by unifying 2 rules into 1. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:36` + + +Basically, we can only add `SubstitutionRule` if we can ensure it's better than the previous ones. As said in Calcite's comment for `SubstitutionRule` + +> A rule that implements this interface indicates that the new RelNode is typically better than the old one. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:107` + + +As discussed offline, `prune old` shouldn't forcefully be bound to `SubstitutionRule`. Keep flexibility currently. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml:10` + + +This is a good case of applying `PPLAggGroupMergeRule`. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml:11` + + +In SQL, `group by a, b` is equivalent to `group by b, a`, many logic follows this principle in Calcite. And this is case is affected by `AggregateProjectMergeRule` in Calcite. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:186` + + +add `private` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:190` + + +ditto + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:47` + + +InterruptibleRelRule is in package `opensearch` and has dependency on `OpenSearchTimeoutException` while this rule is package `core`. + +Therefore, we cannot make this extends `InterruptibleRelRule` unless move that from package `opensearch` to `core` and add library `opensearch` in core gradle. + +On the other hand, if there is interrupt triggered in planning process, it should be detected in our push down rules in package `opensearch`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:186` + + +We should give the methods in `AggregateBuilderHelper` package level accessibility. I see all methods in this class are using default symbol + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:12` + + +It's odd why this IT passed with `{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}` + +It should be `desc` + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:12` + + +ASC is the default order. SORT is removed as it’s before DEDUP + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4991: Dedup pushdown (TopHits Agg) should work with Object fields + +**URL:** https://github.com/opensearch-project/sql/pull/4991 + +**Author:** @LantaoJin + +**Created:** 2025-12-23T08:03:13Z + +**State:** MERGED + +**Merged:** 2025-12-24T07:26:59Z + +**Changes:** +53 -27 (6 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +#4844 converted `dedup` to TopHits Agg. But failed to parse dedup column if the column is a child of Object field. +#4360 restored the internal primitive value in a Map for Aggregates (first, last, min, max) which stored these Map objects in their accumulators.(first, last, min, max) stored these Map objects in their accumulators. But this fixing was not necessary since #4844 fixed them in other way. + +In this PR: +1. fix the bug of get the dedup column names +2. revert #4360 + +### Related Issues +Resolves #4990 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @aalva500-prog on `integ-test/src/test/resources/expectedOutput/calcite/big5/dedup_metrics_size_field.yaml:1` + + +Hi @LantaoJin, thank you for the changes. The `dedup` command now works, but looks like the query execution plan changed. Looks like it now includes a PROJECT pushdown optimization: + +``` +curl -X POST "localhost:9200/_plugins/_ppl/_explain" \ +-H "Content-Type: application/json" \ +-d '{ +"query": "source = big5 | dedup metrics.size | sort - @timestamp" +}' +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$7], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], data_stream=[$18], host=[$22], metrics=[$24], aws=[$27], event=[$32])\n LogicalSort(sort0=[$17], dir0=[DESC-nulls-last])\n LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], data_stream=[$18], data_stream.dataset=[$19], data_stream.namespace=[$20], data_stream.type=[$21], host=[$22], host.name=[$23], metrics=[$24], metrics.size=[$25], metrics.tmin=[$26], aws=[$27], aws.cloudwatch=[$28], aws.cloudwatch.ingestion_time=[$29], aws.cloudwatch.log_group=[$30], aws.cloudwatch.log_stream=[$31], event=[$32], event.dataset=[$33], event.id=[$34], event.ingested=[$35], _id=[$36], _index=[$37], _score=[$38], _maxscore=[$39], _sort=[$40], _routing=[$41])\n LogicalFilter(condition=[<=($42, 1)])\n LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], data_stream=[$18], data_stream.dataset=[$19], data_stream.namespace=[$20], data_stream.type=[$21], host=[$22], host.name=[$23], metrics=[$24], metrics.size=[$25], metrics.tmin=[$26], aws=[$27], aws.cloudwatch=[$28], aws.cloudwatch.ingestion_time=[$29], aws.cloudwatch.log_group=[$30], aws.cloudwatch.log_stream=[$31], event=[$32], event.dataset=[$33], event.id=[$34], event.ingested=[$35], _id=[$36], _index=[$37], _score=[$38], _maxscore=[$39], _sort=[$40], _routing=[$41], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $25)])\n LogicalFilter(condition=[IS NOT NULL($25)])\n CalciteLogicalIndexScan(table=[[OpenSearch, big5]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$7], dir0=[DESC-nulls-last])\n CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, data_stream, host, metrics, metrics.size, aws, event], AGGREGATION->rel#2183:LogicalAggregate.NONE.[](input=LogicalProject#2181,group={0},agg#0=LITERAL_AGG(1))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"data_stream\",\"host\",\"metrics\",\"metrics.size\",\"aws\",\"event\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":10000,\"sources\":[{\"metrics.size\":{\"terms\":{\"field\":\"metrics.size\",\"missing_bucket\":false,\"order\":\"asc\"}}}]},\"aggregations\":{\"$f1\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"metrics.size\",\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"data_stream\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\":[]},\"script_fields\":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +``` + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/big5/dedup_metrics_size_field.yaml:1` + + +> Looks like it now includes a PROJECT pushdown optimization + +Did you run on the latest code? I didn't see the project pushdown action in the explain output. + + +## General Comments + + +### @LantaoJin + + +cc @aaarone90 +cc @ahkcs can you help to confirm the current fixing could address the issue of https://github.com/opensearch-project/sql/issues/4359 as long as the `CalcitePPLAggregationIT` passed? + + +--- + +# PR #4985: [AUTO] Increment version to 2.19.5-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/4985 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-22T21:36:43Z + +**State:** MERGED + +**Merged:** 2026-01-07T16:34:53Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `v2.19.5` + + +## Description + +- Incremented version to **2.19.5-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +Version bump, Ignore IT + + +--- + +# PR #4983: Support profile options for PPL - Part I Implement phases level metrics. + +**URL:** https://github.com/opensearch-project/sql/pull/4983 + +**Author:** @penghuo + +**Created:** 2025-12-22T19:45:12Z + +**State:** MERGED + +**Merged:** 2026-01-09T02:31:35Z + +**Changes:** +655 -11 (23 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +- Introduce query profiling framework with profile contexts, metrics, and thread-local lifecycle helpers. +- Propagate PPL `profile` flag through transport and request parsing, enforce supported formats/paths, and document profiling usage. +- Capture per-phrase metrics across planning, optimization, execution, and response formatting; add profiling teardown and update related tests/expectations. +- Doc, https://github.com/penghuo/os-sql/blob/ee0d477cd56cdeddec6da15a334b82f606245ac5/docs/user/ppl/interfaces/endpoint.md#profile + +### Related Issues +https://github.com/opensearch-project/sql/issues/4294 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @qianheng-aws - COMMENTED + + +@penghuo Is this profile metric safe for parallelism updating? For `OPENSEARCH_TIME`, it may be updated by multiple thread if there is join or union. + + +### @LantaoJin - CHANGES_REQUESTED + + +above + + +## Review Comments + + +### @penghuo on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.profile.yml:None` + + +ignore it. + + +### @dai-chen on `docs/user/ppl/interfaces/endpoint.md:None` + + +Any future plan to include DSL profile output for each stage? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:43` + + +Is atomic operation required here? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:205` + + +Just found this doesn't call listener properly which may cause profile context leak? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:150` + + +This is to avoid NPE somewhere? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:205` + + +Cleanup in [TransportPPLQueryAction](https://github.com/opensearch-project/sql/pull/4983/changes#diff-fd5cf7d596c746360ff54c37c1bd21bee320ee21ace49a03f1a5617b68150685R209) + + +### @penghuo on `docs/user/ppl/interfaces/endpoint.md:None` + + +No plan yet, Let me as experimental for profile option. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:150` + + +yes, explain endpoint does not support profile. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:43` + + +not necessary. Fix in https://github.com/opensearch-project/sql/pull/4983/changes/820d75783fcc9b1ab14f9366bd42dbc9b6929d9e + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java:None` + + +Should use `add` here since we support pagination for aggregate and this code will be called multiplet-times? + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/monitor/profile/MetricName.java:9` + + +Do we need PARSE_TIME, though it should be tiny? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:None` + + +Should be AtomicLong? We have multi-thread usage in opensearch_time + + +### @LantaoJin on `docs/user/ppl/interfaces/endpoint.md:None` + + +I'd like make all the metrics lower case which to align with OpenSearch DSL profile + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/monitor/profile/QueryProfiling.java:19` + + +We search with multiple threads on JOIN/Subsearch queries. We the total time should sum them. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/MetricName.java:9` + + +I ignore it for now, should be minial. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:None` + + +Add back. + + +### @penghuo on `docs/user/ppl/interfaces/endpoint.md:None` + + +done. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/QueryProfiling.java:19` + + +see comments. https://github.com/opensearch-project/sql/pull/4983#issuecomment-3726499582 + + +## General Comments + + +### @penghuo + + +> @penghuo Is this profile metric safe for parallelism updating? For `OPENSEARCH_TIME`, it may be updated by multiple thread if there is join or union. +> We search with multiple threads on JOIN/Subsearch queries. We the total time should sum them. + +Agree. I sepereate into 2 PRs. +The final output include Phases (current PR), and inlcude a Plan section to include operator level metrics PostgreSQL style (2nd PR). +``` +"profile": { + "summary": { + "total_time_ms": 123.4 + }, + "phases": { + "analyze": { "time_ms": 4.8 }, + "optimize": { "time_ms": 8.3 }, + "execute": { "time_ms": 95.0 }, + "format": { "time_ms": 14.1 } + }, + "plan": { + "node": "Result", + "time_ms": 14.0, + "rows": 1000, + "children": [ + { + "node": "HashJoin", + "time_ms": 60.0, + "rows": 1000, + "children": [ + { "node": "Scan(index_a)", "time_ms": 30.0}, + { "node": "Scan(index_b)", "time_ms": 28.0} + ] + } + ] + } +} +``` + + + +--- + +# PR #4981: Remove GetAlias Call + +**URL:** https://github.com/opensearch-project/sql/pull/4981 + +**Author:** @aparajita31pandey + +**Created:** 2025-12-22T04:37:12Z + +**State:** MERGED + +**Merged:** 2026-01-09T17:44:55Z + +**Changes:** +43 -9 (2 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +This diff is removes a redundant alias-resolution call via the `GET /_alias/` API that requires the caller to have extra indices:admin/aliases/get privileges, which can cause permission issues when executing read queries. + +It instead leverages existing `GetFieldMapping` call that works for both index and alias. + +Tested Functionality +#### With Index Name +``` +curl -X POST "localhost:9200/_plugins/_sql" -H 'Content-Type: application/json' \ + -d '{ + "query": "SELECT * FROM my-index", + "fetch_size": 1, + "filter": { + "term": { + "another_field": "hello" + } + } + }' +Output - +{ + "schema": [ + { + "name": "new_field", + "type": "text" + }, + { + "name": "another_field", + "type": "keyword" + } + ], + "total": 1, + "datarows": [[ + "Some text", + "hello" + ]], + "size": 1, + "status": 200 +} +``` + +#### With Alias Name - Same Output +``` +curl -X POST "localhost:9200/_plugins/_sql" -H 'Content-Type: application/json' \ + -d '{ + "query": "SELECT * FROM my-alias", + "fetch_size": 1, + "filter": { + "term": { + "another_field": "hello" + } + } + }' +Output - +{ + "schema": [ + { + "name": "new_field", + "type": "text" + }, + { + "name": "another_field", + "type": "keyword" + } + ], + "total": 1, + "datarows": [[ + "Some text", + "hello" + ]], + "size": 1, + "status": 200 +} +``` +### Related Issues +Resolves +#2960, https://github.com/opensearch-project/security/issues/5871 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +Ideally this should have an integ test, but nothing wrong with the impl + + +### @aalva500-prog - APPROVED + + +LGTM, but agree with @Swiddis that some test would be ideal. + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +## Review Comments + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java:None` + + +Could you double check if multiple entires can be returned in certain case? Just want to make sure this check is not too strict and fail in valid case. + +``` +# Create test-index-1, 2, 3 +curl -X PUT "localhost:9200/test-index-1" -H 'Content-Type: application/json' -d' +{ + "mappings": { + "properties": { + "name": { "type": "text" }, + "age": { "type": "integer" }, + "timestamp": { "type": "date" } + } + } +}' + +... + +curl -X POST "localhost:9200/_aliases" -H 'Content-Type: application/json' -d' +{ + "actions": [ + { "add": { "index": "test-index-*", "alias": "wildcard-alias" } } + ] +}' + +curl -X GET "localhost:9200/wildcard-alias/_mapping?pretty" +{ + "test-index-1" : { + ... + }, + "test-index-2" : { + ... + }, + "test-index-3" : { + ... +} +``` + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java:None` + + +Hi @aparajita31pandey, could you take a look at this? thanks! + + +### @aparajita31pandey on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java:None` + + +@dai-chen Thankyou for pointing this out. I have updated the code and have added similar `integ-test` around it. Please have a look. + + +## General Comments + + +### @aparajita31pandey + + +@LantaoJin Can I please get a review ? + + +### @Swiddis + + +Re: integ tests, I have https://github.com/opensearch-project/sql/pull/5008 which lays a lot more groundwork for adding more permissions-related tests to our codebase. + +Since this PR was opened before that existed, I won't block on it -- after both PRs merge I'll write a small task for myself to add a test for this. + + +### @aparajita31pandey + + +@Swiddis @aalva500-prog I have added a small integ test for this change. Can I get a re-review ? + + +--- + +# PR #4979: Support nested aggregation when calcite enabled + +**URL:** https://github.com/opensearch-project/sql/pull/4979 + +**Author:** @LantaoJin + +**Created:** 2025-12-19T09:59:29Z + +**State:** MERGED + +**Merged:** 2026-01-05T02:46:24Z + +**Changes:** +1181 -215 (43 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Refactor implementation for PPL: https://github.com/opensearch-project/sql/pull/3696 (closed) +Deprecated implementation for SQL: https://github.com/opensearch-project/sql/pull/2814 (closed) + +Support [nested aggregation](https://docs.opensearch.org/docs/latest/aggregations/bucket/nested/) in PPL only when calcite enabled. + +With this PR, follow PPL query is able to execute a nested aggregation query. +```SQL +source=logs | head 10000 | stats min(pages.load_time) +``` +And it equals the DSL +``` +GET logs/_search +{ + "aggs": { + "pages": { + "nested": { + "path": "pages" + }, + "aggs": { + "min_load_time": { "min": { "field": "pages.load_time" } } + } + } + } +} +``` + +Follow queries (group-by nested path) are supported with this PR as well: +``` +source=test | top pages.load_time +source=test | stats count() by pages.load_time +source=test | dedup pages.load_time +``` + +**Limitation:** +- PPL only +- Calcite should be enabled +- Throw **UnsupportedOperationException** if pushdown cannot be applied. +- Follow queries (group-by nested root path) are supported with this PR without pushdown enahncement: +``` +source=test | top pages +source=test | stats count() by pages +source=test | dedup pages +``` + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4949, https://github.com/opensearch-project/sql/issues/4564, https://github.com/opensearch-project/sql/issues/2813 and https://github.com/opensearch-project/sql/issues/2529, and https://github.com/opensearch-project/sql/issues/2739 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:179` + + +After upgraded to 1.41. this method was not called any more, change it to `createPrepare()` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:301` + + +This hook was called twice for non-full-scannable plan + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintStrategyTable.java:None` + + +rename `stats_args` to `agg_args` since aggregation was not only happens in `stats` command. + + +### @LantaoJin on `docs/user/ppl/interfaces/endpoint.md:114` + + +avoid whole-plan-pushdown + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java:39` + + +For some exception without cause (root exception), the actual root cause may be attached in its suppressed cause. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java:28` + + +Prefer to display the root cause instead of wrapping exception. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:241` + + +To avoid ambiguous, we call the multiple sub-aggregations `structured` aggregations instead of `nested` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:367` + + +Here the value could be `min(a.b), count(c)`, rename `aggFieldNames` to `aggNames`. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4978: [Backport 2.19-dev] Support pushdown dedup with expression (#4957) + +**URL:** https://github.com/opensearch-project/sql/pull/4978 + +**Author:** @LantaoJin + +**Created:** 2025-12-19T07:03:33Z + +**State:** MERGED + +**Merged:** 2025-12-22T02:17:24Z + +**Changes:** +645 -170 (39 files) + + +## Description + +(cherry picked from #4957 commit cbcdbd6fc918e4a356480300c208aa76f468fbf1) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4976: Add Frequently Used Big5 PPL Queries + +**URL:** https://github.com/opensearch-project/sql/pull/4976 + +**Author:** @aalva500-prog + +**Created:** 2025-12-18T22:30:01Z + +**State:** MERGED + +**Merged:** 2026-01-06T18:58:48Z + +**Changes:** +157 -2 (7 files) + +**Labels:** `testing`, `backport 2.19-dev` + + +## Description + +### Description +This PR continues the work done in PR #4816 to add frequent used queries to the big5 workload based on gap analysis between existing benchmarks and frequent used query patterns. + +`dedup` query is added here: #4991 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +can you resolve the conflicts + + +## Review Comments + + +### @anasalkouz on `integ-test/src/test/resources/big5/queries/rex_regex_transformation.ppl:25` + + +nit: maybe explain what this query do + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +Any reason to do correctness check for this one? I thought this IT is only for benchmark? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:741` + + +This PR is only to add more test queries right? Why the behavior changed? + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:741` + + +The behavior changed because the file `integ-test/src/test/resources/big5/data/big5.json` was modified and now has more data. + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +This PR is to continue the work done here by @noCharger: https://github.com/opensearch-project/sql/pull/4816. I don't have the whole context, unfortunately. However, I think it is not only for benchmark, as the same was done for the`dedup` command in this PR: https://github.com/opensearch-project/sql/pull/4991 + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +But other test methods (except dedup) only do timing without this assertion? My understanding is this IT shouldn't do correctness check. + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +Removed correctness check for rex command. + + +## General Comments + + +### @aalva500-prog + + +@Swiddis the SQL CLI Integration tests are failing, any recommendations on how to fix this? + + +--- + +# PR #4974: Add unified query compiler API + +**URL:** https://github.com/opensearch-project/sql/pull/4974 + +**Author:** @dai-chen + +**Created:** 2025-12-18T17:59:34Z + +**State:** MERGED + +**Merged:** 2026-01-07T18:01:02Z + +**Changes:** +554 -42 (10 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces a `UnifiedQueryCompiler` as part of the Unified Execution Runtime, enabling direct evaluation of PPL queries via a reference implementation. It completes the third pillar of the Unified Query API (alongside unified query planner and transpiler) and allows external consumers to execute PPL end-to-end using a Calcite-based in-memory evaluator, as described in #4782. + +**Key Changes** + +- `UnifiedQueryCompiler`: Introduces a new API that compiles Calcite logical plans into executable JDBC statements. +- `UnifiedQueryContext` lifecycle management: Implements AutoCloseable to properly manage resource lifecycle. +- Integration tests: Adds end-to-end integration tests demonstrating the complete workflow from context creation, query planning, compilation, and execution. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4894 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `api/README.md:121` + + +UnifiedQueryCompiler.compile() compiles a RelNode into an executable query plan by leveraging Calcite’s Enumerable physical operators? + + +### @dai-chen on `api/README.md:121` + + +Yes, it's the same as current PPL Calcite logic in core module. Thanks! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4971: [Backport 2.19-dev] Add scalar min/max to BuiltinFunctionName (#4967) + +**URL:** https://github.com/opensearch-project/sql/pull/4971 + +**Author:** @LantaoJin + +**Created:** 2025-12-18T02:13:17Z + +**State:** MERGED + +**Merged:** 2025-12-18T06:52:23Z + +**Changes:** +14 -7 (4 files) + + +## Description + +(cherry picked from #4967 commit 7dfabcea94952ead0a27463d810097d74446acc4) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4970: [Backport 2.19-dev] Extract unified query context for shared config management + +**URL:** https://github.com/opensearch-project/sql/pull/4970 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-17T22:54:42Z + +**State:** MERGED + +**Merged:** 2025-12-18T02:14:00Z + +**Changes:** +343 -215 (7 files) + + +## Description + +Backport 297074c1ed595c9d2e6ec34b6cca8ad6a247d0ea from #4933. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4967: Add scalar min/max to BuiltinFunctionName + +**URL:** https://github.com/opensearch-project/sql/pull/4967 + +**Author:** @LantaoJin + +**Created:** 2025-12-17T05:59:15Z + +**State:** MERGED + +**Merged:** 2025-12-18T02:09:50Z + +**Changes:** +14 -7 (4 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Add scalar min/max to BuiltinFunctionName. + +Since parsing min/max aggregation function and scalar function (eval function) in AST parser are separated, we can use different names in `BuiltinFunctionName` with no changes in PPL interface/syntax. + +`eval a = max(b)` -> SCALAR_MAX +`stats max(b)/eventstats max(b)/streamstats max(b)` -> (AGG) MAX + +### Related Issues +Resolves #4774 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2908` + + +Can you please kindly add a javadoc on `BuiltinFunctionName.of` to instruct future developers to use `BuiltinFunctionName.ofAggregation` for aggregation functions? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4965: [Backport 2.19-dev] Support `mvmap` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4965 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-16T22:21:56Z + +**State:** MERGED + +**Merged:** 2025-12-17T02:34:10Z + +**Changes:** +499 -11 (15 files) + + +## Description + +Backport 11727a488a77ae392ef0f9da4bbde601937ffc5f from #4856. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4964: [Backport 2.19-dev] Feature addtotals and addcoltotals + +**URL:** https://github.com/opensearch-project/sql/pull/4964 + +**Author:** @dai-chen + +**Created:** 2025-12-16T22:12:48Z + +**State:** MERGED + +**Merged:** 2025-12-17T00:26:46Z + +**Changes:** +2313 -1 (25 files) + +**Assignees:** @dai-chen + + +## Description + +Backport 15e2411aba434f39d9b0c0d57e0f9b2e1b6f4c87 from #4754 with integration test fix by enabling Calcite. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4963: [Backport 2.19-dev] issue #4514 tonumber function as part of roadmap #4287 + +**URL:** https://github.com/opensearch-project/sql/pull/4963 + +**Author:** @dai-chen + +**Created:** 2025-12-16T19:28:01Z + +**State:** MERGED + +**Merged:** 2025-12-17T00:26:13Z + +**Changes:** +688 -2 (11 files) + +**Assignees:** @dai-chen + + +## Description + +Backport 342a78be6b83fb36df839cc840bb90b687a4751e from #4605 with additional integration test fix by enabling Calcite. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4962: Update CodeRabbit instructions + +**URL:** https://github.com/opensearch-project/sql/pull/4962 + +**Author:** @ykmr1224 + +**Created:** 2025-12-15T21:27:08Z + +**State:** MERGED + +**Merged:** 2025-12-17T23:04:44Z + +**Changes:** +95 -4 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Update CodeRabbit instructions based on #4497, #4605, #4675 + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4889 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4961: [Backport 2.19-dev] Support `mvzip` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4961 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-15T21:08:05Z + +**State:** MERGED + +**Merged:** 2025-12-16T20:39:56Z + +**Changes:** +457 -1 (10 files) + + +## Description + +Backport 52a691a7e63c03f7acf7b1df0f118a3952257f95 from #4805. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4958: Escape underscore character in documentation for LIKE + +**URL:** https://github.com/opensearch-project/sql/pull/4958 + +**Author:** @Blitheness + +**Created:** 2025-12-15T14:06:24Z + +**State:** MERGED + +**Merged:** 2025-12-15T21:21:44Z + +**Changes:** +3 -3 (1 files) + +**Labels:** `documentation`, `PPL` + + +## Description + +### Description +Properly escape underscore character in documentation for LIKE so it doesn't render as italicised text. + +### Related Issues +NA + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +Thanks! + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4957: Support pushdown dedup with expression + +**URL:** https://github.com/opensearch-project/sql/pull/4957 + +**Author:** @LantaoJin + +**Created:** 2025-12-15T10:18:02Z + +**State:** MERGED + +**Merged:** 2025-12-19T06:41:38Z + +**Changes:** +655 -170 (39 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Support pushdown `dedup` with expression: +- the dedup columns contain expressions + - `| eval new_gender = lower(gender), new_name = lower(name) | dedup 2 new_gender, new_name` +- the other columns contain expressions + - `| eval new_gender = lower(gender), new_name = lower(name) | dedup 2 gender, name` + +This PR also implicitly support pushdown `join with max option` with expressions +- the join keys contain expressions + - `source = t1 | eval new_gender = lower(gender) | join new_gender [source = t2 | eval new_gender = lower(gender) ]` +- the other columns contain expresssions + - `source = t1 | eval new_gender = lower(gender) | join gender [source = t2 | eval new_gender = lower(gender) ]` + +### Related Issues +Resolves #4789 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +nit: use `var` or `Map` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:91` + + +Seems the comment also needs updated, as it no longer returns source but fields + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +Can you explain the intuition / reason for reordering the columns? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +That's why I suggested you to use aggregateWithTrimming() instead of calling the relBuilder.aggregate() directly. In SQL, the Project added for Aggregate always keeps the group columns in the front of others, it means the group keys set always contain {0}, or there could be trigger a Calcite bug. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:91` + + +It still returns source, the field will be added when scripts are existed. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:94` + + +Seems this be simplified to +``` + dedupColumnIndices.stream() + .map(projectWithWindow.getInput().getRowType().getFieldNames()::get) + .toList() +``` + + + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:96` + + +Will this ever happened since dedupColumnNames is derived from dedupColumnIndices? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +I don't get it as well. Since the `aggregate` is going to be pushed into scan and the final generated RelNode is a single `Scan` operator, will it really trigger the bug in Calcite? + +Do we do similar thing in the previous PR for supporting dedup push down? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +There is already `import org.apache.commons.lang3.tuple.Pair` imported. Could it be switched to that or at least unified? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +sure. will convert to common.lang3's Pair + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +The definition is in L93 `.>map(`. But yes, I can add `Map` here and remove L93. The reason use L93 instead of `Map` is we used unnamed variable, but now it has to define a name. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:94` + + +No. above code doesn't cover the rename case: +``` +source=test | rename status as http_status | dedup http_status | fields http_status | sort http_status +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:96` + + +I am 100% sure they have same size. Better to keep this check. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +> I don't get it as well. Since the `aggregate` is going to be pushed into scan and the final generated RelNode is a single `Scan` operator, will it really trigger the bug in Calcite? +> +> Do we do similar thing in the previous PR for supporting dedup push down? + +Yes. I see bugs when the group key columns are not in the front of child Project in Calcite in developing support Aggregate with Calcite. Not sure is it fixed or not. Check the comment here https://github.com/opensearch-project/sql/blob/7dfabcea94952ead0a27463d810097d74446acc4/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java#L1049 + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +That happens in CalciteRelNodeVisitor, but is similar change needed in the push down process? + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4956: [Backport 2.19-dev] Pushdown join with `max=n` option to TopHits aggregation (#4929) + +**URL:** https://github.com/opensearch-project/sql/pull/4956 + +**Author:** @LantaoJin + +**Created:** 2025-12-12T07:24:55Z + +**State:** MERGED + +**Merged:** 2025-12-12T08:15:42Z + +**Changes:** +168 -130 (27 files) + + +## Description + +(cherry picked from #4929 commit 4bf5c9c776e7f8cb11714d68fbc2c9163475ef23) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4955: remove shadow jar + +**URL:** https://github.com/opensearch-project/sql/pull/4955 + +**Author:** @xinyual + +**Created:** 2025-12-12T06:18:09Z + +**State:** MERGED + +**Merged:** 2025-12-15T02:17:54Z + +**Changes:** +2 -69 (2 files) + +**Labels:** `dependencies`, `maintenance` + + +## Description + +### Description +We try to remove shadow jar since access control now is already removed in core. +Revert the change in PR https://github.com/opensearch-project/sql/pull/3447 + +Also fix an IT due to the import name changed from +shaded.com.google -> com.google + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +It confused me why a plan changed by your PR + + +### @xinyual on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +After decode, the script previously using +`import shaded.com.google...` +The `shaded` here is caused by shade jar. +But now we remove it, so it is +`import com.google...` +So the script changed. + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +can you run explain command with `extended` and attach the result here + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +oh, it's a v2 plan. not sure how to decode the base64 string. + + +### @xinyual on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +Using online decode and here are the result: +using shadow jar + +�^Esr�6org.opensearch.sql.expression.parse.PatternsExpressionΪG^F^B�^BZ�^PuseCustomPatternL�^Gpatternt�^YLjava/util/regex/Pattern;xr�3org.opensearch.sql.expression.parse.ParseExpression +} ^B�^DL� +identifiert�*Lorg/opensearch/sql/expression/Expression;L� +identifierStrt�^RLjava/lang/String;L�^Gpatternq�~�^CL� sourceFieldq�~�^Cxr�0org.opensearch.sql.expression.FunctionExpression*0uj{^B�^BL� argumentst�^PLjava/util/List;L� functionNamet�5Lorg/opensearch/sql/expression/function/FunctionName;xpsr�=`shaded.com.google.common.collect.ImmutableList$SerializedForm`��������^B�^A[�elementst�^S[Ljava/lang/Object;xpur�^S[Ljava.lang.Object;X^Ps)l^B��xp���^Csr�1org.opensearch.sql.expression.ReferenceExpressionD\^R^G^B�^DL�^Dattrq�~�^DL�^Epathsq�~�^FL�^GrawPathq�~�^DL�^Dtypet�'Lorg/opensearch/sql/data/type/ExprType;xpt�^Eemailsr�^Zjava.util.Arrays$ArrayList٤<͈^F^B�^A[�^Aaq�~� +xpur�^S[Ljava.lang.String;V^]{G^B��xp���^Aq�~�^Qq�~�^Qsr�:org.opensearch.sql.opensearch.data.type.OpenSearchTextType^D1D^B�^AL�^Ffieldst�^OLjava/util/Map;xr�:org.opensearch.sql.opensearch.data.type.OpenSearchDataTypec^B^E5^B�^CL� exprCoreTypet�+Lorg/opensearch/sql/data/type/ExprCoreType;L� mappingTypet�HLorg/opensearch/sql/opensearch/data/type/OpenSearchDataType$MappingType;L� +propertiesq�~�^Wxp~r�)org.opensearch.sql.data.type.ExprCoreType��������^R��xr�^Njava.lang.Enum��������^R��xpt�^GUNKNOWN~r�Forg.opensearch.sql.opensearch.data.type.OpenSearchDataType$MappingType��������^R��xq�~�^]t�^DTextsr�<*`shaded.com.google.common.collect.ImmutableMap$SerializedForm`*��������^B�^BL�^Dkeyst�^RLjava/lang/Object;L�^Fvaluesq�~�$xpuq�~� ����uq�~� ����sr�^Qjava.util.CollSerW:^[^Q^C�^AI�^Ctagxp���^Cw^D���^Bt�^Gkeywordsq�~�^X~q�~�^\t�^FSTRING~q�~� t�^GKeywordq�~�%xsr�/org.opensearch.sql.expression.LiteralExpressionEB-ǂ$^B�^AL� exprValuet�)Lorg/opensearch/sql/data/model/ExprValue;xpsr�-org.opensearch.sql.data.model.ExprStringValue�A2%s^N^S^B�^AL�^Evalueq�~�^Dxr�/org.opensearch.sql.data.model.AbstractExprValuekv^F^TD^B��xpt��sq�~�0sq�~�3t�^Npatterns_fieldsr�3org.opensearch.sql.expression.function.FunctionName 8Mg^B�^AL� functionNameq�~�^Dxpt�patternsq�~�7q�~�9q�~�2q�~�^P�p + + +And without shadow jar + +�^Esr�6org.opensearch.sql.expression.parse.PatternsExpressionΪG^F^B�^BZ�^PuseCustomPatternL�^Gpatternt�^YLjava/util/regex/Pattern;xr�3org.opensearch.sql.expression.parse.ParseExpression} ^B�^DL� +identifiert�*Lorg/opensearch/sql/expression/Expression;L� +identifierStrt�^RLjava/lang/String;L�^Gpatternq�~�^CL� sourceFieldq�~�^Cxr�0org.opensearch.sql.expression.FunctionExpression*0uj{^B�^BL� argumentst�^PLjava/util/List;L� functionNamet�5Lorg/opensearch/sql/expression/function/FunctionName;xpsr�6*`com.google.common.collect.ImmutableList$SerializedForm`*��������^B�^A[�elementst�^S[Ljava/lang/Object;xpur�^S[Ljava.lang.Object;X^Ps)l^B��xp���^Csr�1org.opensearch.sql.expression.ReferenceExpressionD\^R^G^B�^DL�^Dattrq�~�^DL�^Epathsq�~�^FL�^GrawPathq�~�^DL�^Dtypet�'Lorg/opensearch/sql/data/type/ExprType;xpt�^Eemailsr�^Zjava.util.Arrays$ArrayList٤<͈^F^B�^A[�^Aaq�~� +xpur�^S[Ljava.lang.String;V^]{G^B��xp���^Aq�~�^Qq�~�^Qsr�:org.opensearch.sql.opensearch.data.type.OpenSearchTextType^D1D^B�^AL�^Ffieldst�^OLjava/util/Map;xr�:org.opensearch.sql.opensearch.data.type.OpenSearchDataTypec^B^E5^B�^CL� exprCoreTypet�+Lorg/opensearch/sql/data/type/ExprCoreType;L� mappingTypet�HLorg/opensearch/sql/opensearch/data/type/OpenSearchDataType$MappingType;L� +propertiesq�~�^Wxp~r�)org.opensearch.sql.data.type.ExprCoreType��������^R��xr�^Njava.lang.Enum��������^R��xpt�^GUNKNOWN~r�Forg.opensearch.sql.opensearch.data.type.OpenSearchDataType$MappingType��������^R��xq�~�^]t�^DTextsr�5*`com.google.common.collect.ImmutableMap$SerializedForm`*��������^B�^BL�^Dkeyst�^RLjava/lang/Object;L�^Fvaluesq�~�$xpuq�~� ����uq�~� ����sr�^Qjava.util.CollSerW:^[^Q^C�^AI�^Ctagxp���^Cw^D���^Bt�^Gkeywordsq�~�^X~q�~�^\t�^FSTRING~q�~� t�^GKeywordq�~�%xsr�/org.opensearch.sql.expression.LiteralExpressionEB-ǂ$^B�^AL� exprValuet�)Lorg/opensearch/sql/data/model/ExprValue;xpsr�-org.opensearch.sql.data.model.ExprStringValue�A2%s^N^S^B�^AL�^Evalueq�~�^Dxr�/org.opensearch.sql.data.model.AbstractExprValuekv^F^TD^B��xpt��sq�~�0sq�~�3t�^Npatterns_fieldsr�3org.opensearch.sql.expression.function.FunctionName 8Mg^B�^AL� functionNameq�~�^Dxpt�patternsq�~�7q�~�9q�~�2q�~�^P�p + +You could see the com.google.common.collect.ImmutableList$SerializedForm path changed. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4954: [Backport 2.19-dev] [DOC] Show backticks in testing-doctest.md + +**URL:** https://github.com/opensearch-project/sql/pull/4954 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-12T03:56:22Z + +**State:** MERGED + +**Merged:** 2025-12-12T05:55:56Z + +**Changes:** +6 -6 (1 files) + + +## Description + +Backport c527afc0f0e7868068b69a05c521cf1ee7c70334 from #4941. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4953: [Backport 2.19-dev] [DOC] Callout the aggregation result may be approximate + +**URL:** https://github.com/opensearch-project/sql/pull/4953 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-12T03:55:44Z + +**State:** MERGED + +**Merged:** 2025-12-12T05:55:49Z + +**Changes:** +58 -1 (3 files) + + +## Description + +Backport 90ee47c6f909d38f5ba12cef3c2bda8c5f23cce5 from #4922. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4951: [Backport 2.19-dev] RexCall and RelDataType standardization for script push down (#4914) + +**URL:** https://github.com/opensearch-project/sql/pull/4951 + +**Author:** @qianheng-aws + +**Created:** 2025-12-12T03:05:00Z + +**State:** MERGED + +**Merged:** 2025-12-12T05:56:18Z + +**Changes:** +303 -79 (43 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/bcfcd002d5ec402f257a92f9689097d1c9bf8979 from https://github.com/opensearch-project/sql/pull/4914. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4950: [DOC] PPL docs website exporter script + +**URL:** https://github.com/opensearch-project/sql/pull/4950 + +**Author:** @kylehounslow + +**Created:** 2025-12-12T01:25:36Z + +**State:** MERGED + +**Merged:** 2025-12-15T21:30:17Z + +**Changes:** +273 -76 (7 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Add an automated exporter script that transforms PPL markdown documentation tree (`docs/user/ppl/**/*.md`) into Jekyll-compatible format for the OpenSearch documentation website. + +#### Summary of changes +* Add PPL docs exporter script +* Minor nit fixes to existing markdown docs (discovered when rendering local build of docs website) + +#### Demo +Screen recording of rendered Jekyll site after exporting PPL docs. No manual changes applied. **See the files exported to documentation-website for below demo [here](https://github.com/kylehounslow/documentation-website/tree/381a7a0d733cd43bfc98d6cef748775a09ecc10e/_sql-and-ppl/ppl-reference)** + +https://github.com/user-attachments/assets/07fdf522-5b42-4b1f-a993-ab8dd7798f6c + + + + +#### Exporter script features +- **Jekyll front-matter generation**: Auto-injects layout, title, parent/grand_parent hierarchy, and navigation order +- **link resolution**: Handles all relative link patterns (../, ./, same-directory, subdirectory) +- **Jekyll anchor normalization**: Converts anchors to match Jekyll's format (removes dots/dashes) +- **Deep directory rollup**: Flattens 3+ level directories with automatic redirect_from for original paths. **This is a workaround for a limitation in `just-the-docs` theme on docs website**. +- **Content transformations**: + - Converts PPL code fences to SQL syntax highlighting + - Adds copy buttons to code blocks +- **Directory heading mappings**: Consistent navigation titles (e.g., cmd → "Commands") + +### Related Issues +* https://github.com/opensearch-project/sql/issues/4854 +* https://github.com/opensearch-project/sql/pull/4912 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +lgtm, could probably find maintainability nits if I tried but since this is a one-off script we probably don't need to worry about it + + +## Review Comments + + +### @kylehounslow on `scripts/docs_exporter/export_to_docs_website.py:146` + + +This is incorrect. Without global .md replacement, it would miss anchor links (headings). Example from [ppl/cmd/grok.md](https://github.com/opensearch-project/sql/blob/4bf5c9c776e7f8cb11714d68fbc2c9163475ef23/docs/user/ppl/cmd/grok.md?plain=1#L86): + +```` +## Limitations +The grok command has the same limitations as the parse command, see [parse limitations](./parse.md#Limitations) for details. +```` + + +### @Swiddis on `scripts/docs_exporter/export_to_docs_website.py:301` + + +non-portable paths, but if i understand right this only runs in CI servers or in a box where we control the platform + + +### @penghuo on `docs/user/ppl/index.md:14` + + +when should add ppl ignore? + + +### @kylehounslow on `docs/user/ppl/index.md:14` + + +The `ignore` keyword will [omit the code block from doctests](https://github.com/opensearch-project/sql/blob/ccdb0f77454fa2fcba33540b87e934ec89a25b67/doctest/markdown_parser.py#L139-L140). For this case, we want to exclude from doctests but still want `ppl` syntax highlighting applied. +Note: All `ppl` blocks are [converted to `sql` formatting](https://github.com/kylehounslow/sql/blob/3354c91512f0369b58bf80ffb72e084870308394/scripts/docs_exporter/export_to_docs_website.py#L151) on export to docs website (`sql` is the closest format supported for syntax highlighting). + + +### @kylehounslow on `scripts/docs_exporter/export_to_docs_website.py:301` + + +Uses `pathlib.Path` object with `/` operator ([src](https://github.com/python/cpython/blob/27a2e49d1849751008ea5807558129e11d35fb7a/Lib/pathlib/__init__.py#L176)). Should be portable to all OS + + +### @Swiddis on `scripts/docs_exporter/export_to_docs_website.py:301` + + +Ah, I didn't realize the slash operator carrried to the tailing strings. I thought we'd end up with `{script_dir}\../...`. TIL + + +## General Comments + + +### @kylehounslow + + +@coderabbitai review + + +--- + +# PR #4946: [Backport 3.4] Update 3.4 release note + +**URL:** https://github.com/opensearch-project/sql/pull/4946 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-11T17:33:13Z + +**State:** MERGED + +**Merged:** 2025-12-11T17:55:49Z + +**Changes:** +8 -8 (1 files) + + +## Description + +Backport fc3a9355a84d33db2d30be72c7a2b000de7787f8 from #4939. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4945: [Backport main] Update 3.4 release note + +**URL:** https://github.com/opensearch-project/sql/pull/4945 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-11T17:33:01Z + +**State:** MERGED + +**Merged:** 2025-12-11T17:55:45Z + +**Changes:** +8 -8 (1 files) + + +## Description + +Backport fc3a9355a84d33db2d30be72c7a2b000de7787f8 from #4939. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4942: [Backport 2.19-dev] Replace duplicated aggregation logic with `aggregateWithTrimming()` + +**URL:** https://github.com/opensearch-project/sql/pull/4942 + +**Author:** @ishaoxy + +**Created:** 2025-12-11T07:56:50Z + +**State:** MERGED + +**Merged:** 2025-12-11T08:28:44Z + +**Changes:** +65 -82 (10 files) + + +## Description + +### Description +backport #4926 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4941: [DOC] Show backticks in testing-doctest.md + +**URL:** https://github.com/opensearch-project/sql/pull/4941 + +**Author:** @LantaoJin + +**Created:** 2025-12-11T05:15:36Z + +**State:** MERGED + +**Merged:** 2025-12-11T08:32:03Z + +**Changes:** +6 -6 (1 files) + +**Labels:** `documentation`, `backport 2.19-dev` + + +## Description + +### Description +Show backticks in testing-doctest.md + +Before: +Screenshot 2025-12-11 at 1 14 09 PM + +After: +Screenshot 2025-12-11 at 1 14 56 PM + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +ping @kylehounslow + + +--- + +# PR #4939: Update 3.4 release note + +**URL:** https://github.com/opensearch-project/sql/pull/4939 + +**Author:** @ahkcs + +**Created:** 2025-12-10T23:31:09Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:26:09Z + +**Changes:** +8 -8 (1 files) + +**Labels:** `backport main`, `backport 3.4` + + +## Description + +### Description +3.4 release note update + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4938: [Backport 2.19-dev] backport markdown doctest support + +**URL:** https://github.com/opensearch-project/sql/pull/4938 + +**Author:** @kylehounslow + +**Created:** 2025-12-10T23:12:37Z + +**State:** MERGED + +**Merged:** 2025-12-11T18:44:20Z + +**Changes:** +19030 -17651 (150 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Backport markdown doctest support from https://github.com/opensearch-project/sql/pull/4912 to `2.19-dev` branch + +### Related Issues +Resolves merge conflicts blocking bot from creating auto backport PRs. + +### Check List + - [n/a] New functionality has javadoc added. +- [n/a] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [n/a] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4937: [Backport 2.19-dev] Enhance doc and error message handling for `bins` on time-related fields + +**URL:** https://github.com/opensearch-project/sql/pull/4937 + +**Author:** @ahkcs + +**Created:** 2025-12-10T21:34:10Z + +**State:** MERGED + +**Merged:** 2025-12-15T16:28:26Z + +**Changes:** +522 -1 (5 files) + + +## Description + +Enhance doc and error message handling for `bins` on time-related field (#4713) + +(cherry picked from commit ef4c51e0e15e6d8e5385ea3605c536775396fc39) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4936: [Backport 2.19-dev] Time Unit Unification for bin/stats (#4450) + +**URL:** https://github.com/opensearch-project/sql/pull/4936 + +**Author:** @ahkcs + +**Created:** 2025-12-10T20:05:12Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:22:11Z + +**Changes:** +599 -60 (5 files) + +**Labels:** `maintenance` + + +## Description + +(cherry picked from commit 5bb274740685a57d1798e70ab43f6859e3d7ee81) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4935: [Backport 3.4] Update 3.4 release Note (#4923) + +**URL:** https://github.com/opensearch-project/sql/pull/4935 + +**Author:** @ahkcs + +**Created:** 2025-12-10T18:56:39Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:29:58Z + +**Changes:** +157 -2 (2 files) + +**Labels:** `skip-changelog` + + +## Description + +Update 3.4 doc +(cherry picked from commit c87f99f5554e3e7edba2855bc2d0f219f4506c0d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4934: [Backport main] Update 3.4 release Note (#4923) + +**URL:** https://github.com/opensearch-project/sql/pull/4934 + +**Author:** @ahkcs + +**Created:** 2025-12-10T18:51:20Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:29:31Z + +**Changes:** +155 -0 (1 files) + +**Labels:** `skip-changelog` + + +## Description + +Update 3.4 doc +(cherry picked from commit c87f99f5554e3e7edba2855bc2d0f219f4506c0d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4933: Extract unified query context for shared config management + +**URL:** https://github.com/opensearch-project/sql/pull/4933 + +**Author:** @dai-chen + +**Created:** 2025-12-10T17:32:25Z + +**State:** MERGED + +**Merged:** 2025-12-17T22:54:26Z + +**Changes:** +343 -215 (7 files) + +**Labels:** `bug`, `maintenance`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces `UnifiedQueryContext`, a reusable abstraction shared across unified query components (parser, planner, compiler, etc.). It centralizes configuration by constructing and bundling `CalcitePlanContext` and `Settings` into a single object. As a result, all unified query components can now read required configuration explicitly, resolving the configuration propagation issue tracked https://github.com/opensearch-project/sql/issues/4910. + +### Related Issues + +Resolves https://github.com/opensearch-project/sql/issues/4910. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `api/README.md:None` + + +High-level Q: What session do you mean? I mean, how to define a session start and close? I didn't see any session management code at a glance. + + +### @dai-chen on `api/README.md:None` + + +I was thinking about session in SQL/Spark. Since we've not clearly defined it, let me remove it to avoid confusion. Thanks! + + +### @dai-chen on `api/README.md:None` + + +Addressed in https://github.com/opensearch-project/sql/pull/4933/changes/f650fb5fc90150d973cd4ac0c6adbbf984792ef6. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4932: Add feedback reminder for CodeRabbit + +**URL:** https://github.com/opensearch-project/sql/pull/4932 + +**Author:** @ykmr1224 + +**Created:** 2025-12-10T17:24:44Z + +**State:** MERGED + +**Merged:** 2025-12-11T22:01:28Z + +**Changes:** +28 -0 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Add feedback reminder for CodeRabbit + - When CodeRabbit made review comment, it automatically adds reminder message to leave feedback. `👋 Leave emoji reaction (👍/👎) to track effectiveness of CodeRabbit.` + - Later we can take stats on how many comments got positive/negative feedback via Github API, and make improvement based on the result. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Just wonder is this doable in Coderabbit config? Just feel this workflow is very specific for this small task. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @dai-chen + + +@ykmr1224 Also wondering is this knowledge base feature available to us? Instead of manual analysis, can we give feedback to Coderabbit directly? https://docs.coderabbit.ai/integrations/knowledge-base + + +### @ykmr1224 + + +> Just wonder is this doable in Coderabbit config? Just feel this workflow is very specific for this small task. + +Coderabbit don't have configuration for this kind of customization so far. + +> @ykmr1224 Also wondering is this knowledge base feature available to us? Instead of manual analysis, can we give feedback to Coderabbit directly? https://docs.coderabbit.ai/integrations/knowledge-base + +It is enabled and doable, but that relies on each developer giving appropriate feedback. +We should track overall effectiveness and improve that continuously. + + +### @dai-chen + + +> > Just wonder is this doable in Coderabbit config? Just feel this workflow is very specific for this small task. +> +> Coderabbit don't have configuration for this kind of customization so far. +> +> > @ykmr1224 Also wondering is this knowledge base feature available to us? Instead of manual analysis, can we give feedback to Coderabbit directly? https://docs.coderabbit.ai/integrations/knowledge-base +> +> It is enabled and doable, but that relies on each developer giving appropriate feedback. We should track overall effectiveness and improve that continuously. + +I agree. Either the knowledge base can separate team and personal preference, or we give permission to a small group. We can discuss offline. I'm just thinking we can maintain a single source of coding guidance, team knowledge between local Dev agent and this Review agent. Thanks! cc: @penghuo + + +--- + +# PR #4930: [Backport 2.19-dev] Support composite aggregation paginating (#4884) + +**URL:** https://github.com/opensearch-project/sql/pull/4930 + +**Author:** @LantaoJin + +**Created:** 2025-12-10T10:01:39Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:38:44Z + +**Changes:** +1121 -532 (190 files) + + +## Description + +(cherry picked from #4884 commit 9930665c372f433eea4aeb04b5a4cfcd51be3e9e) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4929: Pushdown join with `max=n` option to TopHits aggregation + +**URL:** https://github.com/opensearch-project/sql/pull/4929 + +**Author:** @LantaoJin + +**Created:** 2025-12-10T09:09:26Z + +**State:** MERGED + +**Merged:** 2025-12-12T06:36:57Z + +**Changes:** +170 -132 (27 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Pushdown join with `max=n` option to TopHits aggregation: +- The right side subsearch with `max=n` will be converted to TopHits aggregation. +- For inner join, the `SortMergeJoin` may be converted to `HashJoin` by reordering the sides of join +- For non-inner join, the right side will be fully pushed down to DSL, rather than executing `WindowFunction` in memory. + +### Related Issues +Resolves #4927 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Remove it? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +sure. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +The behavior of system limit has changed from `limitation of source` to `limitation of the results after top hits`. + +So if we cannot push down the window, it will scan all rows from the source. @LantaoJin + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +I corrected the behavior since the define of `plugins.ppl.join.subsearch_maxout` is + +> The size configures the maximum of rows from subsearch to join against. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +Can we ensure the window will always be pushed down? Otherwise it will get regression than before? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +No if (1) the join keys contain text (not keyword); (2) the join keys contain expression (I am working on support #4789 which could resolve it) + + +## General Comments + + +### @LantaoJin + + +> core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java (1)
+> +> `1323-1374`: **Address the gap between documentation and SEMI/ANTI join implementation.** +> +> The documentation allows `max` option with `semi` and `anti` join types, but the code returns early for SEMI/ANTI joins (lines 1328-1332) before processing the `max` option and applying dedup/limit optimizations. This creates an inconsistency: users can specify `max` with SEMI/ANTI joins per the documented syntax, but it will be silently ignored. +> +> Either remove the early return to enable `max` support for SEMI/ANTI joins, or add a validation error when `max` is specified with SEMI/ANTI, and update documentation to clarify the limitation. + +@coderabbitai the `max` option takes no effect for `semi` and `anti` join types because `semi` and `anti` joins just use left side to filter the records in left side. So the join results in whatever max=1, max=2 or max=∞ are totally same. + + +--- + +# PR #4928: [Backport 2.19-dev] Support sort expression pushdown for SortMergeJoin(#4830) + +**URL:** https://github.com/opensearch-project/sql/pull/4928 + +**Author:** @songkant-aws + +**Created:** 2025-12-10T08:37:30Z + +**State:** MERGED + +**Merged:** 2025-12-11T07:35:34Z + +**Changes:** +437 -133 (17 files) + + +## Description + +### Description +Backport #4830 to 2.19-dev + +### Related Issues + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4926: Replace duplicated aggregation logic with `aggregateWithTrimming()` + +**URL:** https://github.com/opensearch-project/sql/pull/4926 + +**Author:** @ishaoxy + +**Created:** 2025-12-10T06:36:20Z + +**State:** MERGED + +**Merged:** 2025-12-11T07:31:01Z + +**Changes:** +65 -76 (10 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description + Just use `aggregateWithTrimming()` to avoid duplicating existing functionality. + +### Related Issues +Resolves #4925 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1833` + + +@yuancu @songkant-aws we'd better not to call `context.relBuilder.aggregate` directly, instead, use `aggregateWithTrimming` to build aggregation in stack. I see some codes in `visitChart`, `rankByColumnSplit` and `visitPatterns`. Can you create a issue to replace? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1833` + + +Or co-auther with @ishaoxy to address them in this PR. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1833` + + +I did so because I needed to build multiple aggregations for the chart command. `aggregateWithTrimming` works on PPL's AST (composed of `UnresolvedPlan`) and creates RexNode AST, but I have already gone through this in the first aggregation. Therefore, I had to call `relBuilder.aggregate` the second time I created an aggregation to aggregate on RexNode. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4924: Remove all AccessController refs + +**URL:** https://github.com/opensearch-project/sql/pull/4924 + +**Author:** @Swiddis + +**Created:** 2025-12-09T20:27:42Z + +**State:** MERGED + +**Merged:** 2025-12-10T17:11:58Z + +**Changes:** +377 -581 (25 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Removes all uses of AccessController, which is deprecated since version 3.0. Finishing what #4900 started. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java:138` + + +@coderabbitai can you commit this? + + +### @Swiddis on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/ExecuteDirectQueryActionResponse.java:112` + + +Will skip this, it's pre-existing behavior and I'm not sure what the impact would be to change it + +Long-term: use a report wrapper #4919 + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/security/SecurityAccess.java:None` + + +Will remove this class entirely since it's just another doPrivileged wrapper + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4923: Update 3.4 release Note + +**URL:** https://github.com/opensearch-project/sql/pull/4923 + +**Author:** @ahkcs + +**Created:** 2025-12-09T17:44:53Z + +**State:** MERGED + +**Merged:** 2025-12-09T21:16:26Z + +**Changes:** +92 -10 (2 files) + +**Labels:** `PPL`, `backport main`, `backport-failed`, `skip-changelog`, `backport 3.4` + + +## Description + +### Description +Update 3.4 release Note + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:9` + + +Keep the message consistent with other features. +Change it to support `replace` + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:48` + + +Shall we add all new functions under the features section? + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:26` + + +Should be under features sections + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:27` + + +All new eval functions to be added under the feature section + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:9` + + +Updated + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:48` + + +Moved eval functions to features section + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:26` + + +Updated + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:27` + + +Moved eval functions to features section + + +## General Comments + + +### @ahkcs + + +Resolving the comments in a new PR, which will also be backported: +https://github.com/opensearch-project/sql/pull/4939 + + +--- + +# PR #4922: [DOC] Callout the aggregation result may be approximate + +**URL:** https://github.com/opensearch-project/sql/pull/4922 + +**Author:** @LantaoJin + +**Created:** 2025-12-09T09:43:15Z + +**State:** MERGED + +**Merged:** 2025-12-11T08:34:18Z + +**Changes:** +58 -1 (3 files) + +**Labels:** `documentation`, `backport 2.19-dev` + + +## Description + +### Description +[DOC] Callout the aggregation result may be approximate + +### Related Issues +Resolves #4915 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @kylehounslow on `docs/user/ppl/cmd/stats.md:None` + + +Thanks for keeping our docs fresh! + +Note: If no output is provided, doctests might fail. Need to use "ignore" to omit from doctest. E.g. +```` +```ppl ignore +source=hits +| stats bucket_nullable=false count() as c by URL +| sort - c +| head 10 +``` +```` + +Else provide the expected output in a codeblock following the `ppl` block. Example: +```` +```ppl +search source=accounts | where age > 25 | fields firstname, lastname +``` + +Expected output: + +```text ++-------------+------------+ +| firstname | lastname | +|-------------+------------| +| Amber | Duke | +| Hattie | Bond | ++-------------+------------+ +``` +```` + + +See https://github.com/opensearch-project/sql/blob/5f963a0a0ae29e20d84306d3423daf104cddeb42/docs/dev/testing-doctest.md#markdown-format-new---currently-for-docsuserppl-only + + +### @LantaoJin on `docs/user/ppl/cmd/stats.md:None` + + +Thanks! `ignore` added. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4920: [2.19-dev] Disable Calcite by default + +**URL:** https://github.com/opensearch-project/sql/pull/4920 + +**Author:** @LantaoJin + +**Created:** 2025-12-09T03:12:30Z + +**State:** MERGED + +**Merged:** 2025-12-09T06:50:34Z + +**Changes:** +93 -1 (8 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Disable Calcite by default in 2.19-dev + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4918: [Backport 2.19-dev] Support `split` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4918 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-08T21:48:13Z + +**State:** MERGED + +**Merged:** 2025-12-10T23:27:05Z + +**Changes:** +218 -0 (8 files) + + +## Description + +Backport 5dca84f73315aafee61878c279bc7ba904c18be1 from #4814. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4917: [Backport 2.19-dev] Add unified query transpiler API + +**URL:** https://github.com/opensearch-project/sql/pull/4917 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-08T20:02:24Z + +**State:** MERGED + +**Merged:** 2025-12-08T21:22:15Z + +**Changes:** +238 -39 (8 files) + + +## Description + +Backport d4daa34d83130429f44f14ecd703e070782c13c7 from #4871. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4916: [Backport 2.19-dev] Implement one-batch lookahead for index enumerators (#4345) + +**URL:** https://github.com/opensearch-project/sql/pull/4916 + +**Author:** @Swiddis + +**Created:** 2025-12-08T17:46:50Z + +**State:** MERGED + +**Merged:** 2025-12-10T09:36:39Z + +**Changes:** +468 -74 (21 files) + + +## Description + +### Description +Backport #4345 as there's no perf regression in OSB. In general, I'm confident that this helps it many query cases and doesn't overall hurt the general case. + +Had to convert the Scanner away from record types because of JDK compatibility, should be no functionality diff. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `prometheus/src/test/java/org/opensearch/sql/prometheus/storage/PrometheusStorageFactoryTest.java:133` + + +test.com just dropped out of DNS, leading to URI validation failures. Quick fix. + + +## General Comments + + +### @LantaoJin + + +@Swiddis I still see the compile errors in CI + + +--- + +# PR #4914: RexCall and RelDataType standardization for script push down + +**URL:** https://github.com/opensearch-project/sql/pull/4914 + +**Author:** @qianheng-aws + +**Created:** 2025-12-08T03:56:22Z + +**State:** MERGED + +**Merged:** 2025-12-12T02:48:16Z + +**Changes:** +303 -79 (43 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +This PR continues https://github.com/opensearch-project/sql/pull/4795 work, it includes change: +1. Implement RexCall standardization by using `RexNormalize.normalize` +2. Implement RelDataType standardization by widening the type, see details in `RexStandardizer::widenType` +3. Support Sarg literal value by expanding `SEARCH` to conjunction expressions. +4. Support decimal literal value by downgrading to double value. + + +### Related Issues +Resolves #4757 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:109` + + +`helper.stack` here is an `ArrayDeque`, while used as a stack + + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +can you add a IT to verify the generated script with skip encoding via format=extended. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +We have test `testSkipScriptEncodingOnExtendedFormat`. Do you mean set `format=extended` for `agg_case_cannot_push`? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +Added in test case `testRexStandardizationForScript` + +The script is formatted below, with several changes: +- `>=` -> `<=` +- SEARCH -> AND(...) +- INT nullable=false -> BIGINT nullable=true +- CHAR[xxx] nullable=false -> VARCHAR nullable=true + +``` +{ + "op": { + "name": "CASE", + "kind": "CASE", + "syntax": "SPECIAL" + }, + "operands": [ + { + "op": { + "name": "<", + "kind": "LESS_THAN", + "syntax": "BINARY" + }, + "operands": [ + { + "dynamicParam": 0, + "type": { + "type": "BIGINT", + "nullable": true + } + }, + { + "dynamicParam": 1, + "type": { + "type": "BIGINT", + "nullable": true + } + } + ] + }, + { + "dynamicParam": 2, + "type": { + "type": "VARCHAR", + "nullable": true, + "precision": -1 + } + }, + { + "op": { + "name": "AND", + "kind": "AND", + "syntax": "BINARY" + }, + "operands": [ + { + "op": { + "name": "<=", + "kind": "LESS_THAN_OR_EQUAL", + "syntax": "BINARY" + }, + "operands": [ + { + "dynamicParam": 3, + "type": { + "type": "BIGINT", + "nullable": true + } + }, + { + "dynamicParam": 4, + "type": { + "type": "BIGINT", + "nullable": true + } + } + ] + }, + { + "op": { + "name": "<=", + "kind": "LESS_THAN_OR_EQUAL", + "syntax": "BINARY" + }, + "operands": [ + { + "dynamicParam": 5, + "type": { + "type": "BIGINT", + "nullable": true + } + }, + { + "dynamicParam": 6, + "type": { + "type": "BIGINT", + "nullable": true + } + } + ] + } + ] + }, + { + "dynamicParam": 7, + "type": { + "type": "VARCHAR", + "nullable": true, + "precision": -1 + } + }, + { + "dynamicParam": 8, + "type": { + "type": "VARCHAR", + "nullable": true, + "precision": -1 + } + } + ] +} +``` + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:2136` + + +We should avoid reusing `FORMAT` parameter for `EXPLAIN OPTION` and add a new parameter for it. `EXPLAIN OPTION` affects the content of this API while `FORMAT` only affects the format like JSON, CSV, YAML. + +@coderabbitai Could you create an issue to track this? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:67` + + +I encountered a similar problem -- `Sarg` can be serialized but can not be properly deserialized. I addressed it by replacing `Sarg sarg = sargFromJson((Map) literal)` with `Sarg sarg = sargFromJson((Map) literal, type)` in `ExtendedRelJson.java`. Maybe it helps. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:67` + + +The problem I met here is I cannot translate a Sarg literal into a java object by using `RexToLixTranslator.translateLiteral`. Thus, we cannot generate a parameter for it. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:67` + + +From the implementation of `RexToLixTranslator `, it seems Sarg can only be used as literal in RexNode expression but not DynamicParams. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4912: Migrate PPL Documentation from RST to Markdown + +**URL:** https://github.com/opensearch-project/sql/pull/4912 + +**Author:** @kylehounslow + +**Created:** 2025-12-06T00:36:33Z + +**State:** MERGED + +**Merged:** 2025-12-09T21:52:43Z + +**Changes:** +20383 -16782 (157 files) + +**Labels:** `maintenance` + + +## Description + +## Description + +This PR converts all PPL documentation under `docs/user/ppl/` from reStructuredText to Markdown format, enabling automated export to the main [OpenSearch +documentation website](https://docs.opensearch.org/latest/about/) at [opensearch-project/documentation-website](https://github.com/opensearch-project/documentation-website). + +**Important Note: All existing doctest coverage has been migrated successfully and all existing GitHub-based documentation remains intact and fully functional. This change enables PPL documentation to appear on the main OpenSearch docs site while preserving the existing GitHub-based documentation experience. See demo below.** + +### Live Demo: +* GitHub docs: https://github.com/kylehounslow/sql/blob/feat/markdown-doctests/docs/user/ppl/index.md +* Main docs build: + +### Why? +* Enables automatic export of PPL docs to main OpenSearch documentation site. Currently changes are made via manual copy/paste resulting in stale, inconsistent docs. +* Improves discoverability of new/existing PPL commands and functionality. +* Improves developer experience with clean, copy-able PPL code snippets. + +### Related Issues +* https://github.com/opensearch-project/sql/issues/4854 +* https://github.com/opensearch-project/documentation-website/pull/11621 + +### Summary of Changes + +#### Documentation Format Migration +* Converted 70+ RST files to Markdown across all PPL documentation sections +* Updated `docs/category.json` to reflect new file structure +* Removed shell prefixes and output from code blocks for clean copy-paste +* **GitHub documentation experience unchanged** - same content, same navigation, rendered by GitHub's native Markdown support + +#### Doctest Changes +* Added `markdown_parser.py` to support doctest execution on Markdown code blocks +* Extended existing doctest framework to handle both RST and Markdown formats +* All existing tests pass with new parser + +#### Export Tooling +* `export_to_docs_website.py` - Jekyll-compatible export to inject proper front-matter, etc while preserving exact docs structure from `docs/user/ppl`. + * Note: main docs page has already promoted SQL and PPL section to top-level to accommodate this: https://github.com/opensearch-project/documentation-website/pull/11621 +* Conversion scripts (only run once. Kept for reference/re-use on remaining sql docs): + * `convert_rst_to_md.py` - Automated RST to Markdown conversion + * `fix_markdown_formatting.py` - Post-conversion cleanup and standardization to ensure proper Jekyll rendering + +### Future PR +* Migrate remaining `docs/user/sql` to markdown. + +### Check List + - [n/a] New functionality has javadoc added. + - [n/a] New functionality has a user manual doc added. +- [n/a] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +UT failure due to test.com is down +Fix: https://github.com/opensearch-project/sql/pull/4916/commits/afcfbd951d4b387106047c363f770fc43ad021c2 + + +--- + +# PR #4908: [Backport 2.19-dev] Error handling for dot-containing field names + +**URL:** https://github.com/opensearch-project/sql/pull/4908 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-05T00:41:37Z + +**State:** MERGED + +**Merged:** 2025-12-05T05:56:45Z + +**Changes:** +384 -7 (4 files) + + +## Description + +Backport 8126367a787b121e4467e3467d9f158a421290c0 from #4907. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4907: Error handling for dot-containing field names + +**URL:** https://github.com/opensearch-project/sql/pull/4907 + +**Author:** @ahkcs + +**Created:** 2025-12-04T17:57:22Z + +**State:** MERGED + +**Merged:** 2025-12-05T00:12:19Z + +**Changes:** +384 -7 (4 files) + +**Labels:** `PPL`, `backport 2.19-dev`, `bugFix` + + +## Description + +

Summary

+

Resolves #4896ArrayIndexOutOfBoundsException when querying an index containing malformed field names (e.g., ".", "..", ".a", "a.", "a..b") inside disabled object fields.

+

Disabled objects ("enabled": false) bypass field-name validation, allowing malformed names to be indexed and subsequently causing crashes in the SQL/PPL engines.

+
+

Root Cause

+

OpenSearchExprValueFactory.JsonPath constructs field paths using:

+
rawPath.split("\\.");
+
+

For malformed field names, split("\\.") behaves unexpectedly:

+ +Field Name | Result of split() | Issue +-- | -- | -- +".", ".." | [] (empty array) | dot-only → paths.get(0) crashes +".a" | ["", "a"] | leading dot → empty path segment +"a." | ["a"] (trailing empty removed) | trailing dot silently lost +"a..b" | ["a", "", "b"] | consecutive dots → empty segment + + + + + + + +## Summary by CodeRabbit + +## Release Notes + +* **Bug Fixes** + * Improved handling of queries on object fields containing malformed field names (dot-only names, leading/trailing dots, or consecutive dots). Invalid fields now return null while valid fields remain accessible. + +* **Documentation** + * Added documentation describing limitations when querying object fields with malformed field names and recommendations to avoid problematic naming patterns. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/limitations/limitations.rst:None` + + +> disabled object field, + +remove disabled. + +> PPL queries will return ``null`` for those specific fields. + +PPL ignore malformed fieldname. + + +### @penghuo on `docs/user/ppl/limitations/limitations.rst:None` + + +malformed names field are ignored. + + +### @ahkcs on `docs/user/ppl/limitations/limitations.rst:None` + + +Updated + + +### @ahkcs on `docs/user/ppl/limitations/limitations.rst:None` + + +Updated + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4902: [Backport 2.19-dev] Support timeouts for Calcite queries (#4857) + +**URL:** https://github.com/opensearch-project/sql/pull/4902 + +**Author:** @Swiddis + +**Created:** 2025-12-03T21:42:17Z + +**State:** MERGED + +**Merged:** 2025-12-05T05:57:49Z + +**Changes:** +236 -45 (20 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Backport #4857 to 2.19-dev + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@Swiddis Can you set an appropriate commit message when you enable automatic merging next time? + + +--- + +# PR #4901: Adjust CodeRabbit review config + +**URL:** https://github.com/opensearch-project/sql/pull/4901 + +**Author:** @ykmr1224 + +**Created:** 2025-12-03T21:28:19Z + +**State:** MERGED + +**Merged:** 2025-12-04T17:29:30Z + +**Changes:** +6 -5 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Adjust CodeRabbit review config to enable auto review. +- Some more minor changes. + +### Related Issues +https://github.com/opensearch-project/sql/issues/4889 +https://github.com/opensearch-project/.github/issues/412 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4900: Remove doPrivileged call in Calcite script execution + +**URL:** https://github.com/opensearch-project/sql/pull/4900 + +**Author:** @Swiddis + +**Created:** 2025-12-03T19:35:46Z + +**State:** MERGED + +**Merged:** 2025-12-04T19:31:01Z + +**Changes:** +2 -7 (1 files) + +**Labels:** `maintenance`, `performance`, `calcite` + + +## Description + +### Description +Seems to delete a major perf bottleneck: we create an access controller context for every individual document we process in a script, which involves a lot of stack trace traversal and locking for accessing that context. Curious what the breakage is if I just delete this controller step, since it passes tests locally. At least for the query I was interested in, it reduced the runtime from 70 seconds (2mil documents) to 4. + +Better approach if we need it: move the privilege step to outside the core script loop. + +Measured from slow query (big5 dataset): +``` +source = big5 +| where `agent.name` = 'filebeat' +| where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` <= '2023-01-02 00:00:00' +| where `metrics.size` > 1000 and `metrics.size` != 5000 +| stats count(`agent.name`) +``` + +Before (70s/query): +image + +After (4s/query): +image + + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the changes! If I recall right, this is mostly for action whitelisted in plugin-security.policy? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @songkant-aws + + +Grreat! I remember somehow 2.19 still needs it. But it was removed in OpenSearch since 3.x version. + + +--- + +# PR #4895: [Backport 2.19-dev] [BugFix] Fix Memory Exhaustion for Multiple Filtering Operations in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4895 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-02T21:41:24Z + +**State:** MERGED + +**Merged:** 2025-12-02T23:38:55Z + +**Changes:** +291 -126 (37 files) + + +## Description + +Backport 52fe8aa5bd19e8008e4c11bcbd2ea69946b5724e from #4841. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4891: [Backport 2.19-dev] Add hashCode() and equals() to the value class of ExprJavaType + +**URL:** https://github.com/opensearch-project/sql/pull/4891 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-02T00:46:19Z + +**State:** MERGED + +**Merged:** 2025-12-02T02:28:45Z + +**Changes:** +88 -1 (3 files) + + +## Description + +Backport 96370bfa573831d046db5f5c7029113460cbbb11 from #4885. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4890: Add config for CodeRabbit review + +**URL:** https://github.com/opensearch-project/sql/pull/4890 + +**Author:** @ykmr1224 + +**Created:** 2025-12-01T23:59:46Z + +**State:** MERGED + +**Merged:** 2025-12-02T17:27:18Z + +**Changes:** +185 -0 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Add config files for CodeRabbit review (AI review assistant) + - Requested use of Amazon Q, but there were discussion in LF and they decided to use CodeRabbit. (https://github.com/opensearch-project/.github/issues/412) +- Disable auto review for now (enable once we establish configuration). + +### Related Issues +https://github.com/opensearch-project/sql/issues/4889 +https://github.com/opensearch-project/.github/issues/412 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - APPROVED + + +Hi @ykmr1224, thanks for the change. LGTM. Left a minor question. + + +## Review Comments + + +### @RyanL1997 on `.coderabbit.yaml:22` + + +Nice, I think we can also start exercising this in our PR template when we initially creating PR. + + +### @RyanL1997 on `.rules/REVIEW_GUIDELINES.md:48` + + +Is this rule specific for command development? + + +### @ykmr1224 on `.rules/REVIEW_GUIDELINES.md:48` + + +No, this is generic for any code change in this repository. + + +### @Swiddis on `.coderabbit.yaml:69` + + +Not sure if I like this setting, maybe others like it though? + +Usually when responding to AI comments the only audience is other reviewers (e.g. "this comment doesn't apply" or "implemented"), I'm not sure I see the value of the AI reply + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4885: Add hashCode() and equals() to the value class of ExprJavaType + +**URL:** https://github.com/opensearch-project/sql/pull/4885 + +**Author:** @LantaoJin + +**Created:** 2025-11-30T10:41:26Z + +**State:** MERGED + +**Merged:** 2025-12-02T00:45:36Z + +**Changes:** +88 -1 (3 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +``` +source=web_logs +| stats count() as request_count by client_ip +| join type=inner client_ip ip_geodata +``` +The query above is optimized to `HashJoin` since the left output comes from an aggregation. Unlike `MergeJoin`, the `HashJoin` uses `hashCode()` to get the equivalence key from a HashMap. The value object `ExprIpValue` of `ExprIPType` should override the hashCode() and equals() methods. (If the join is `MergeJoin`, there is no issue since `ExprIpValue` has already implemented `Comparable`. + +### Related Issues +Resolves #4726 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + + +## Summary by CodeRabbit + +* **Improvements** + * Enhanced IP value type handling to ensure proper equality and comparison operations + +* **Tests** + * Added integration test validating IP type operations in hash join queries across multiple indices + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4884: Support composite aggregation paginating + +**URL:** https://github.com/opensearch-project/sql/pull/4884 + +**Author:** @LantaoJin + +**Created:** 2025-11-28T13:42:27Z + +**State:** MERGED + +**Merged:** 2025-12-08T08:45:14Z + +**Changes:** +1120 -503 (190 files) + +**Labels:** `enhancement`, `data-correctness`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `clickbench` + + +## Description + +### Description +Support composite aggregation paginating + +This PR fix the correctness of following clickbench queries: +Q28, Q29: aggregate with having clause +Q42: offset exceeds the bucket size +and other queries such aggregations within join + +Q28: +```SQL +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c +FROM hits +WHERE URL <> '' +GROUP BY CounterID +HAVING COUNT(*) > 100000 +ORDER BY l DESC +LIMIT 25; +``` +In PPL query, `where c > 100000` actually is a HAVING clause: +``` +source=hits +source=hits +| where URL != '' +| stats bucket_nullable=false avg(length(URL)) as l, count() as c by CounterID +| where c > 100000 +| sort - l +| head 25 +``` + +Q42: +```SQL +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews +FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' +AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 +GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +``` + +In PPL query, the from 10000 exceeds the default bucket size +``` +source=hits +| where CounterID = 62 and EventDate >= '2013-07-01 00:00:00' and EventDate <= '2013-07-31 00:00:00' and IsRefresh = 0 and DontCountHits = 0 and URLHash = 2868770270353813622 +| stats bucket_nullable=false count() as PageViews by WindowClientWidth, WindowClientHeight +| sort - PageViews +| head 10 from 10000 +``` + +Query aggregations within join: +SQL +```SQL +SELECT +* +FROM ( + SELECT COUNT(*) cnt, DEPTNO + FROM EMP + GROUP BY DEPTNO +) t1 +INNER JOIN ( + SELECT COUNT(*) cnt, DEPTNO + FROM DEPT + GROUP BY DEPTNO +) t2 +ON t1.DEPTNO = t2.DEPTNO +``` +PPL +``` +source=EMP +| stats count() as cnt by DEPTNO +| join type=inner DEPTNO [ + source=DEPT + | stats count() as cnt by DEPTNO +] +``` + +### Related Issues +Resolves #4836 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + + +## Summary by CodeRabbit + +## Release Notes + +* **New Features** + * Added support for HAVING clause optimization in aggregation queries to improve query performance. + * Implemented paginating aggregations for enhanced memory efficiency with large datasets. + +* **Tests** + * Expanded test coverage for aggregation queries with HAVING conditions and complex filtering scenarios. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:None` + + +Can you explain why is this condition necessary? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HavingPushdownRule.java:None` + + +nit: we should unify the rule names to clearly distinguish the different Calcite internal rules. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HavingPushdownRule.java:None` + + +Could reuse existing Filter-Index, and Project-Index rule? + + +### @penghuo on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_having1.yaml:None` + + +Having is no pushdown in DSL, But context include HAVING->>($0, 10). is it expected? + + +### @LantaoJin on `docs/user/optimization/optimization.rst:47` + + +remove all `needClean` and `searchDone` in plan. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationPaginatingIT.java:10` + + +Add `CalcitePPLAggregationPaginatingIT` and `CalcitePPLTpchPaginatingIT` to verifiy the results have no changes with paginating feature. + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +main change in yarm files is updating requestedTotalSize to limited value for composite aggregation. + + +## General Comments + + +### @LantaoJin + + +@coderabbitai review + + +--- + +# PR #4883: [Backport 2.19-dev]Refactor alias type field by adding another project with alias (#4881) + +**URL:** https://github.com/opensearch-project/sql/pull/4883 + +**Author:** @qianheng-aws + +**Created:** 2025-11-28T09:19:12Z + +**State:** MERGED + +**Merged:** 2025-11-28T12:26:29Z + +**Changes:** +91 -16 (9 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4881 commit https://github.com/opensearch-project/sql/commit/6f7eae0f04f8d05b087a8b1014799faae3a44479) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4881: Refactor alias type field by adding another project with alias + +**URL:** https://github.com/opensearch-project/sql/pull/4881 + +**Author:** @qianheng-aws + +**Created:** 2025-11-28T07:11:49Z + +**State:** MERGED + +**Merged:** 2025-11-28T08:32:11Z + +**Changes:** +90 -16 (9 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Refactor alias type field by adding another project with alias + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4876 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + + +## Summary by CodeRabbit + +* **Bug Fixes** + * Enhanced processing of alias type fields during query planning and execution. + * Improved field projection and resolution for aliased columns in SQL queries. + +* **Tests** + * Added comprehensive test coverage and validation for alias type field handling in query explain plans. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4880: [Backport 2.19-dev] Remove unnecessary filter for DateHistogram aggregation (#4877) + +**URL:** https://github.com/opensearch-project/sql/pull/4880 + +**Author:** @LantaoJin + +**Created:** 2025-11-28T03:39:02Z + +**State:** MERGED + +**Merged:** 2025-11-28T06:23:36Z + +**Changes:** +184 -496 (31 files) + + +## Description + +(cherry picked from #4877 commit 2f64c40ceb0eb93ec2950788ff6fe545e93be4e6) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4879: [Backport 2.19-dev] Add `bucket_nullable` argument for `Streamstats` command + +**URL:** https://github.com/opensearch-project/sql/pull/4879 + +**Author:** @ishaoxy + +**Created:** 2025-11-27T08:11:46Z + +**State:** MERGED + +**Merged:** 2025-11-28T05:45:35Z + +**Changes:** +681 -173 (25 files) + + +## Description + +### Description +backport #4831 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4878: [Backport 2.19-dev] Convert `dedup` pushdown to composite + top_hits (#4844) + +**URL:** https://github.com/opensearch-project/sql/pull/4878 + +**Author:** @LantaoJin + +**Created:** 2025-11-27T07:58:39Z + +**State:** MERGED + +**Merged:** 2025-11-28T02:58:42Z + +**Changes:** +1224 -493 (74 files) + + +## Description + +(cherry picked from #4844 commit 5ceacb6945d6bacc36e037ce4da215e1cf031b56) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4877: Remove unnecessary filter for DateHistogram aggregation + +**URL:** https://github.com/opensearch-project/sql/pull/4877 + +**Author:** @LantaoJin + +**Created:** 2025-11-27T07:10:23Z + +**State:** MERGED + +**Merged:** 2025-11-28T03:33:55Z + +**Changes:** +185 -497 (31 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR includes 3 changes: +1. Remove unnecessary filter for DataHistogram aggregation, see details in #4873 +2. ~~Correct the interval selection of DataHistogram in `CompositeAggregationBuilder.buildDateHistogram()`~~ +3. Add `head 10` to queries of `big5/queries/composite*.ppl` (composite aggregation in DSL has a default size 10) + +### Related Issues +Resolves #4873 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Summary by CodeRabbit + +* **Bug Fixes** + * Optimized query execution plans for composite aggregations with explicit result limiting. + * Simplified search query predicate handling for improved performance. + +* **Tests** + * Updated test expectations to YAML format for improved maintainability. + * Enhanced test assertion utilities with deprecation guidance for legacy methods. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:664` + + +Here is the primary code change for fixing item 1. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java:None` + + +Here is the primary code changes for fixing item 2. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java:None` + + +bullshit + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java:None` + + +What's different between fixed interval and calendar interval? From the description, seems it should use calendar interval if the unit is calendar-aware. + +So it seems confusing of using `DATE_FIELD_UNITS` here. And also why use `spanValue` instead of `unit`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java:None` + + +Seems the comments in `DateHistogramValuesSourceBuilder::calendarInterval/fixedInterval` is incorrect and misleading. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java:None` + + +DSL doesn't support nM/Y...(n > 1) in either `calendar_interval` or `fixed_interval`. For PPL, we will then fail to push down into date_histogram such case. + +One workaround or enhancement is converting nM -> n * 30 days` and using fixed interval, but it should be inaccurate since month per day or year per day is unstable. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java:None` + + +As discussed offline, will revert this part to avoid breaking changes. We need open an RFC to discuss the expected behavior. The current implementation might be not correct either. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4875: [Backport 2.19-dev] Remove count aggregation for sort on aggregate measure (#4867) + +**URL:** https://github.com/opensearch-project/sql/pull/4875 + +**Author:** @LantaoJin + +**Created:** 2025-11-27T05:53:13Z + +**State:** MERGED + +**Merged:** 2025-11-27T07:31:07Z + +**Changes:** +81 -78 (115 files) + + +## Description + +(cherry picked from #4867 commit 0ab2ba276ead0c9a1d382542e9f2078a2a73ef4a) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4874: [Backport 2.19-dev] Fix wrong parameter and return result logic for LogPatternAggFunction + +**URL:** https://github.com/opensearch-project/sql/pull/4874 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-27T05:33:39Z + +**State:** MERGED + +**Merged:** 2025-11-27T07:31:20Z + +**Changes:** +118 -16 (6 files) + + +## Description + +Backport 885230fa3e128921122f0aad55f71f50b6a3e36d from #4868. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4871: Add unified query transpiler API + +**URL:** https://github.com/opensearch-project/sql/pull/4871 + +**Author:** @dai-chen + +**Created:** 2025-11-26T19:23:43Z + +**State:** MERGED + +**Merged:** 2025-12-08T20:02:10Z + +**Changes:** +238 -39 (8 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR adds a new experimental **`UnifiedQueryTranspiler`** API to the `api` module. The goal is to provide a simple, reusable way to convert a Calcite `RelNode` plan into SQL texts via Calcite `SqlDialect`. For detailed use cases and API design considerations, please refer to the linked issue and `README.md`. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4870, #4820 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + + +## Summary by CodeRabbit + +* **Documentation** + * Expanded API documentation with comprehensive code examples and usage guides + * Added complete end-to-end workflow documentation + * Documented supported SQL dialects for transpilation + * Enhanced Development & Testing and Integration Guide sections + +* **New Features** + * Added query transpilation capabilities to convert queries between different SQL dialects + +* **Chores** + * Updated build configuration and added license headers + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4868: Fix wrong parameter and return result logic for LogPatternAggFunction + +**URL:** https://github.com/opensearch-project/sql/pull/4868 + +**Author:** @songkant-aws + +**Created:** 2025-11-26T09:15:46Z + +**State:** MERGED + +**Merged:** 2025-11-27T05:33:23Z + +**Changes:** +118 -16 (6 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Current patterns uses wrong `buffer_limit` parameter value passed from `max_sample_count`. Also, this PR fixes the logic of returning agg result of logPatternAggFunction. + +### Related Issues +Resolves #4866 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + + +## Summary by CodeRabbit + +## Release Notes + +* **New Features** + * Added support for max_sample_count parameter to limit pattern search results. + +* **Bug Fixes** + * Improved null-safety checks in pattern parser to prevent errors with invalid aggregate data. + * Enhanced pattern buffer handling to properly track both patterns and log entries. + +* **Tests** + * Added integration test validating pattern search with sample count limits. + * Expanded test coverage for pattern aggregation with all parameters. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @peterzhuamazon + + +@coderabbitai review + + +--- + +# PR #4867: Remove count aggregation for sort on aggregate measure + +**URL:** https://github.com/opensearch-project/sql/pull/4867 + +**Author:** @LantaoJin + +**Created:** 2025-11-26T09:02:34Z + +**State:** MERGED + +**Merged:** 2025-11-27T03:35:29Z + +**Changes:** +81 -78 (115 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev`, `clickbench` + + +## Description + +### Description +Remove count aggregation for sort-on-measure case + +### Related Issues +Resolves #4862 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + + +## Summary by CodeRabbit + +## Release Notes + +* **Bug Fixes** + * Standardized aggregation result sorting to use consistent metrics instead of custom aliases, improving sort order reliability in aggregated queries. + * Optimized sub-aggregation handling to eliminate unnecessary default aggregations when not needed. + +* **Tests** + * Updated test expectations to reflect aggregation sorting improvements and sub-aggregation optimizations. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +cc @noCharger + + +### @peterzhuamazon + + +@coderabbitai review + + +--- + +# PR #4865: [Backport 2.19-dev] fix clickbench query 43 + +**URL:** https://github.com/opensearch-project/sql/pull/4865 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-26T08:17:49Z + +**State:** MERGED + +**Merged:** 2025-11-26T09:04:16Z + +**Changes:** +9 -11 (2 files) + + +## Description + +Backport b88bf567ad20dadc5742d2cedc9e8c7f0427f617 from #4861. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4864: [Backport 2.19-dev] Update syntax: like(string, PATTERN[, case_sensitive]) (#4837) + +**URL:** https://github.com/opensearch-project/sql/pull/4864 + +**Author:** @LantaoJin + +**Created:** 2025-11-26T08:10:12Z + +**State:** MERGED + +**Merged:** 2025-11-26T09:48:37Z + +**Changes:** +559 -95 (54 files) + + +## Description + +(cherry picked from #4837 commit 164c3f08754ac6b945c7dfa9a0bf8041e1a6671a) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4863: [Backport 2.19-dev] Specify timestamp field with `timefield` in timechart command (#4784) + +**URL:** https://github.com/opensearch-project/sql/pull/4863 + +**Author:** @yuancu + +**Created:** 2025-11-26T07:54:53Z + +**State:** MERGED + +**Merged:** 2025-11-26T09:04:24Z + +**Changes:** +175 -147 (13 files) + + +## Description + +### Description + +Backport #4784 to 2.19-dev + +### Commit Messages + +* Support param timefield to specify span field in timechart + + + +* Update doc to introduce timefield parameter + + + +* Update ASTBuilderTest for chart: default args are handled in rel node visitor + + + +* Fix ast expression builder test + + + +* Fix anomanyzer test + + + +* Support using specified timefield in per functions + + + +* Omit by-timestamp clause in timechart command + + + +* Mask timefield argument in anonymizer + + + +* Anonymize argument span + + + +--------- + + +(cherry picked from commit afc98dd6757288b8b269b78d8e2e5f323f78cae6) + + + +### Related Issues +Resolves #4576 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4861: fix clickbench query 43 + +**URL:** https://github.com/opensearch-project/sql/pull/4861 + +**Author:** @xinyual + +**Created:** 2025-11-26T05:36:01Z + +**State:** MERGED + +**Merged:** 2025-11-26T08:13:13Z + +**Changes:** +9 -11 (2 files) + +**Labels:** `testing`, `backport 2.19-dev`, `clickbench` + + +## Description + +### Description +Change clickbench query 43 to match the spark query: https://github.com/ClickHouse/ClickBench/blob/20eba442bfbb367566f5f05269a2ce94859ae998/spark/queries.sql#L43 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4858: [Backport 2.19-dev] Add `bucket_nullable` argument for `Eventstats` + +**URL:** https://github.com/opensearch-project/sql/pull/4858 + +**Author:** @ishaoxy + +**Created:** 2025-11-25T03:37:49Z + +**State:** MERGED + +**Merged:** 2025-11-25T05:31:18Z + +**Changes:** +201 -6 (11 files) + + +## Description + +### Description +backport #4817 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4857: Support timeouts for Calcite queries + +**URL:** https://github.com/opensearch-project/sql/pull/4857 + +**Author:** @Swiddis + +**Created:** 2025-11-25T01:11:20Z + +**State:** MERGED + +**Merged:** 2025-12-02T19:46:42Z + +**Changes:** +237 -44 (20 files) + +**Labels:** `PPL`, `maintenance`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev`, `v3.4.0` + + +## Description + +### Description +Patch to address a symptom of #4842. Currently, the only timeout logic we support is in the underlying DSL, which works if we submit a slow query to OpenSearch but fails if we have exhaustion in any other part of the code. During those failures, the sql-worker thread is frozen and takes 100% CPU until completion, which can be several hours (and potentially bring down cluster nodes). + +This PR adds some basic configurable timeout handling, and puts interrupt points in our index enumeration and planning rules. + +### Related Issues +Related to #4842 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Summary by CodeRabbit + +* **New Features** + * Configurable PPL query timeout (default 300s) applied to execution and planning, interrupting long-running operations. + +* **Documentation** + * Added docs and usage example for plugins.ppl.query.timeout, including default, node scope, and dynamic update. + +* **Bug Fixes** + * Enforced timeouts across query execution, planning rules, and index scan iteration to prevent runaway operations and improve robustness. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + +## Reviews + + +### @penghuo - COMMENTED + + +Could u add IT to verify the exception throw correctly? + + +## Review Comments + + +### @LantaoJin on `docs/user/ppl/admin/settings.rst:None` + + +Haven't go to details, curious on the default value. +1. why 120s, can we increase this default value? The default value of `join.subsearch_timeout` in SPL is 120s, maybe this could be larger. +2. can we disable this timeout by set it to a negative value, (similar to `max_time_per_process` in SPL. + + +### @Swiddis on `docs/user/ppl/admin/settings.rst:None` + + +Updated the default to 5 minutes + +From measuring production clusters the highest timeout I saw was 30 seconds, figured 120 was a decent default threshold since it can always be increased + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:43` + + +Not part of this PR (merge conflict resolution), I don't understand the comment either tbh + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java:83` + + +Nice, added + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java:None` + + +Nice + + +### @dai-chen on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:28` + + +Does this impact both SQL and PPL queries or only PPL? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java:61` + + +We can check later if `ScheduledExecutorService` inside `ThreadPool` already provides scheduling task with timeout. + + +### @Swiddis on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:28` + + +I only implemented it with Calcite in mind, but I think it applies to both (up to whatever hits OSQueryManager) + + +## General Comments + + +### @Swiddis + + +> Could u add IT to verify the exception throw correctly? + +Per side-channel discussion, let's fast-follow this + + +--- + +# PR #4856: Support `mvmap` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4856 + +**Author:** @ahkcs + +**Created:** 2025-11-24T23:22:44Z + +**State:** MERGED + +**Merged:** 2025-12-16T22:21:42Z + +**Changes:** +499 -11 (15 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +## **Description** + +This PR adds support for the `mvmap` eval function, which iterates over each element of a multivalue array, applies a given expression, and returns a new array containing the transformed results. + +**Syntax:** +`mvmap(array, expression)` + +During iteration, the field name in the expression is implicitly bound to each element of the input array (e.g., `mvmap(arr, arr * 10)`). + +--- + +## **Implementation Details** + +### **Core Approach** + +`mvmap` is implemented as an alias for PPL's existing `TRANSFORM` operator, which applies a lambda function to each array element. + +To support implicit binding syntax—`mvmap(arr, arr * 10)` instead of requiring explicit lambda syntax (`mvmap(arr, x -> x * 10)`)—AST transformation logic was added in `AstExpressionBuilder.java`. + +### **AST Transformation Steps** + +1. Intercept `mvmap` function calls in `visitEvalFunctionCall()` +2. Extract the field name from the first argument (e.g., `arr`) +3. Wrap the second argument expression into a `LambdaFunction`, using the extracted field name as the parameter +4. Transform: + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ykmr1224 - COMMENTED + + +Is it possible to refer other fields in the second parameter? + +e.g. `... | fields array, a | eval b = mvmap(array, array * a)` + +If yes, we might want to add test case for that. + + +### @dai-chen - DISMISSED + + +Added a minor comment. Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +it's cleaner to handle this in its own visit method. + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +I'm not sure if I understand this part. Why `secondArg` could be a lambda already? Why we do nothing if `firstArg` doesn't have field name? Both are illegal according to the grammar right? + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +What's the expected behavior if first arg is an complex expression? From the code, it seems we try to find the field deep nested and bind it to lambda? + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Hi Chen, I think it makes sense to make both cases illegal, I have updated the code to throw `SyntaxCheckException` + +We allow this initially because we use `TRANSFORM` for implementation and we want to keep the lambda input as an option, but I think it's better to throw exception. + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Yes. For complex expressions like mvmap(mvindex(arr, 1, 3), arr * 10): + + - At parse time: we recursively extract the field name (arr) from the nested function to use as the lambda variable name → mvmap(mvindex(arr, 1, 3), arr -> arr * 10) + - At runtime: iteration happens over the evaluated result of mvindex(arr, 1, 3) + +Also added IT and UT to confirm + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Also added documentation in collection.rst + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Updated, Added dedicated grammar rule `mvmapFunctionCall` and `visitMvmapFunctionCall()` method + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:486` + + +np: use visitor to simplify and cover all cases + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:486` + + +Refactored to use `AbstractNodeVisitor` + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Shall we visit each funcArg and return on first non-null (qualified name found)? + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Updated the `visitFunction` method to iterate through all function arguments and return on the first non-null qualified name found + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Thanks! This will also help answer Tomo's comment https://github.com/opensearch-project/sql/pull/4856#pullrequestreview-3553890924 which also ask clarify for the behavior. + + +## General Comments + + +### @ykmr1224 + + +@coderabbitai review + + +### @ahkcs + + +> Is it possible to refer other fields in the second parameter? +> +> e.g. `... | fields array, a | eval b = mvmap(array, array * a)` +> +> If yes, we might want to add test case for that. + +Updated to support singe-value field + + +--- + +# PR #4853: [Backport 2.19-dev] Support `mvdedup` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4853 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-24T18:48:06Z + +**State:** MERGED + +**Merged:** 2025-11-25T05:16:58Z + +**Changes:** +206 -0 (8 files) + +**Labels:** `enhancement`, `PPL` + + +## Description + +Backport 5049a034636eda9776174734fc6c6e331d0986c2 from #4828. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4852: [Backport 2.19-dev] Grouping key field type can only be overwritten when the `ExprCoreType`s are different + +**URL:** https://github.com/opensearch-project/sql/pull/4852 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-24T09:14:50Z + +**State:** MERGED + +**Merged:** 2025-11-24T12:12:07Z + +**Changes:** +59 -3 (2 files) + + +## Description + +Backport 1c27a6db9b570b34e585cc42987a71f2f53bb8f3 from #4850. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4851: [Backport 2.19-dev]Remove redundant push-down-filters derived for bucket-non-null agg (#4843) + +**URL:** https://github.com/opensearch-project/sql/pull/4851 + +**Author:** @qianheng-aws + +**Created:** 2025-11-24T09:13:39Z + +**State:** MERGED + +**Merged:** 2025-11-25T03:07:02Z + +**Changes:** +172 -62 (36 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/ede63cb7b0c4976c818d3dada2447ee1985d43b5 from https://github.com/opensearch-project/sql/pull/4843. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @qianheng-aws + + +``` +Tests with failures: + - org.opensearch.sql.ppl.dashboard.NfwPplDashboardIT.testTopTCPFlags + - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopTalkersByIPs + - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopTalkersByPackets + - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopDestinationsByPackets + - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopDestinationsByIPs + - org.opensearch.sql.ppl.dashboard.WafPplDashboardIT.testTopTerminatingRules + ``` + + @LantaoJin @yuancu Have you met this failure before? It's weird I cannot reproduce them locally with the given command, e.g. + + +``` +./gradlew ':integ-test:integTest' --tests 'org.opensearch.sql.ppl.dashboard.NfwPplDashboardIT.testTopTCPFlags' -Dtests.seed=61CD58CB24A1BC70 -Dtests.security.manager=false -Dtests.locale=pt -Dtests.timezone=Asia/Saigon -Druntime.java=11 +``` + + +### @qianheng-aws + + +> ``` +> Tests with failures: +> - org.opensearch.sql.ppl.dashboard.NfwPplDashboardIT.testTopTCPFlags +> - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopTalkersByIPs +> - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopTalkersByPackets +> - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopDestinationsByPackets +> - org.opensearch.sql.ppl.dashboard.VpcFlowLogsPplDashboardIT.testTopDestinationsByIPs +> - org.opensearch.sql.ppl.dashboard.WafPplDashboardIT.testTopTerminatingRules +> ``` +> +> @LantaoJin @yuancu Have you met this failure before? It's weird I cannot reproduce them locally with the given command, e.g. +> +> ``` +> ./gradlew ':integ-test:integTest' --tests 'org.opensearch.sql.ppl.dashboard.NfwPplDashboardIT.testTopTCPFlags' -Dtests.seed=61CD58CB24A1BC70 -Dtests.security.manager=false -Dtests.locale=pt -Dtests.timezone=Asia/Saigon -Druntime.java=11 +> ``` + +Implicitly adding sort order on all fields to ensure sequence of the final results. + + +--- + +# PR #4850: Grouping key field type can only be overwritten when the `ExprCoreType`s are different + +**URL:** https://github.com/opensearch-project/sql/pull/4850 + +**Author:** @LantaoJin + +**Created:** 2025-11-24T06:44:51Z + +**State:** MERGED + +**Merged:** 2025-11-24T09:02:21Z + +**Changes:** +59 -3 (2 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +This bug introduced by https://github.com/opensearch-project/sql/pull/4500 by overwriting a new OpenSearchDateType which the custom formats are missing. +How to fix: +We only overwrite the group field' type when their `ExprCoreType`s are different. + +### Related Issues +Resolves #4845 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4849: [Backport 2.19-dev] Perform RexNode expression standardization for script push down. (#4795) + +**URL:** https://github.com/opensearch-project/sql/pull/4849 + +**Author:** @qianheng-aws + +**Created:** 2025-11-24T05:36:54Z + +**State:** MERGED + +**Merged:** 2025-11-24T08:57:08Z + +**Changes:** +810 -425 (82 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/a7c56870d1dcbb0709246768369ef119ad9bf4cd from https://github.com/opensearch-project/sql/pull/4795. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4848: [Backport 2.19-dev] Fix the flaky CalcitePPLTcphIT + +**URL:** https://github.com/opensearch-project/sql/pull/4848 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-24T05:17:19Z + +**State:** MERGED + +**Merged:** 2025-11-24T08:58:17Z + +**Changes:** +33 -9 (3 files) + + +## Description + +Backport c0f56806123eb46e2d6323c6cd15cbdde08b230d from #4846. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4846: Fix the flaky CalcitePPLTcphIT + +**URL:** https://github.com/opensearch-project/sql/pull/4846 + +**Author:** @LantaoJin + +**Created:** 2025-11-22T10:09:00Z + +**State:** MERGED + +**Merged:** 2025-11-24T04:57:17Z + +**Changes:** +33 -8 (3 files) + +**Labels:** `flaky-test`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Fix the flaky CalcitePPLTcphIT + +### Related Issues +Resolves #4261 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java:157` + + +Can you explain how blocking the index load relates to this integration test fix? The connection isn't clear to me from the current changes. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java:157` + + +Not related to the fix. I found this test was ignored due to the bucket size 1000 limitation. Just correct the results and enable it again. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4844: Convert `dedup` pushdown to composite + top_hits + +**URL:** https://github.com/opensearch-project/sql/pull/4844 + +**Author:** @LantaoJin + +**Created:** 2025-11-21T15:31:40Z + +**State:** MERGED + +**Merged:** 2025-11-26T08:12:05Z + +**Changes:** +1207 -477 (74 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Convert `dedup` pushdown to composite + top_hits +Main changes: +1. Add a rule `DedupPushdownRule` to convert the dedup plan pattern to Aggregate with TopHits metrics +2. Upgrade TopHitsParser to support parsing data from `_source` (fetchField cannot work for OS Object type) +3. Corresponding API change on `MetricParser`: `Map parse(Aggregation aggregation);` -> `List> parse(Aggregation aggregation);` + +Follow-ups: Support dedup on script (expression) + +### Related Issues +Resolves #4797 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:363` + + +Can you elaborate more on why calling this method here? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +nit: `helper` seems never used? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml:None` + + +We won't have FILTER->IS NOT NULL($4) push down for common aggregate push down, after this PR: https://github.com/opensearch-project/sql/pull/4843 + +Could you check if we can remove them as well for dedup push down? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:165` + + +LITERAL_AGG in Calcite has totally different function as we use here. It seems to be tricky to implement this in this way. Do we have alternative approach? Or at least add some comments to notice that. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java:None` + + +Seems used nowhere. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Can you elaborate more on why we set `fetchSource(false)` here? The default value is true? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +[question] Is this the only case that agg metric parser should return a List of value for each bucket? I'm thinking Is it still appropriate to translate `dedup x` to `aggregate`? Normally speaking, `aggregate` should return only 1 row value for the metric in each bucket. While `dedup x` can return more than 1 rows and it now affects the API of agg metric parser. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:363` + + +Is it for identifying LITERAL_AGG? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:363` + + +Yes. `LITERAL_AGG` passes the `numberOfDedup` here + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +will remove it + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:165` + + +I don't have other alternative approach. I think it's safe since `LITERAL_AGG` usually used in Project removing for Aggregate. And PPL doesn't have explicit syntax to call `LITERAL_AGG`. I will add comment to elaborate. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Update: change to use `fetchSource` instead of `fetchField` due to `fetchField` cannot work on OS object type. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +> [question] Is this the only case that agg metric parser should return a List of value for each bucket? + +Yeah, just `top_hits` metric agg does. `top_hits` metric agg in OpenSearch can be used in any bucket agg in DSL as a sub-aggregation, it's quite different with SQL' aggregate. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java:None` + + +will remove + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml:None` + + +done + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1601` + + +Will `dedupe` work without ordering by deduped fields? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java:76` + + +What is this field used for? It seems it's not referred anywhere + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:1828` + + +Isn't such case covered by `CalcitePPLDedupIT.testDedupExpr` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:551` + + +Can you check if we will never produce a LITERAL_AGG in RelBuilder or by some rules in planner? + +Otherwise, we may push down a real LITERAL_AGG to be tophits while it shouldn't be. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:551` + + +One alternative in my mind is using a self-defined class extends `calcite.Aggregate`. It should also be able to leverage our AggregateAnalyzer. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1601` + + +em, it should work in non-pushdown case. maybe we could remove this orderBy in window. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java:76` + + +yes. can be deleted in current impl + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1601` + + +It looks a little strange to me because the sort keys in a window should be the same (the partition key) + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:551` + + +> Can you check if we will never produce a LITERAL_AGG in RelBuilder or by some rules in planner? +> +> Otherwise, we may push down a real LITERAL_AGG to be tophits while it shouldn't be. + +Checked, so far no LITERAL_AGG can be produced because PPL doesn't support literal in aggregators. +In SQL +``` +SELECT + dept_id, + COUNT(*) as emp_count, + 1 as constant_val +FROM employees +GROUP BY dept_id; +``` +Could be rewritten to +``` +SELECT + dept_id, + COUNT(*) as emp_count, + LITERAL_AGG(1) as constant_val +FROM employees +GROUP BY dept_id; +``` +to reduce a Project upon Aggregate. + +From +``` +Project(dept_id, emp_count, constant_val=1) + Aggregate(GROUP BY dept_id, compute COUNT(*) as emp_count) + Scan(employees) +``` +To +``` +Aggregate(GROUP BY dept_id, compute COUNT(*) as emp_count, compute LITERAL_AGG(1) as constant_val) + Scan(employees) +``` +But `stats` only support pre-defined aggregators. cc @qianheng-aws + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1601` + + +Yes. it was not introduced by this pr. let's fix them in followup PR since it will change the plan a lot. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:551` + + +I see `SubQueryRemoveRule` will produce LITERAL_AGG for `some`(we don't have) and `in` subquery. But I'm not sure whether it will be triggered. + +And RelBuilder's literalAgg method is public, we should avoid call that method by developers then. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:551` + + +https://issues.apache.org/jira/browse/CALCITE-4334 + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4843: Remove redundant push-down-filters derived for bucket-non-null agg + +**URL:** https://github.com/opensearch-project/sql/pull/4843 + +**Author:** @qianheng-aws + +**Created:** 2025-11-21T08:29:08Z + +**State:** MERGED + +**Merged:** 2025-11-24T07:07:41Z + +**Changes:** +164 -56 (32 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +In our current implementation/final plan, we always push down a redundant filter of ISNOTNULL for group fields if params `BUCKET_NULLABLE=false` or the group field is time span. However, since we've set `MISSING_BUCKET` to false in the aggregation builder, we actually don't need this filter which will introduce more overhead especially the filter is pushed down to be a script. + +This PR enhance the above case by adding a new rule, in which we detect the derived filter from bucket-non-null agg and then ignore them in the push down process. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4811 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml:11` + + +why the groupset and SORT_AGG_METRICS digest changed. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java:None` + + +could be simplified to `rex.isA(SqlKind.IS_NOT_NULL)` + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml:11` + + +Before this PR, the plan can only match the rule `agg-filter-project-scan` after being transformed by some calcite's transpose rule and project merge rule. So the index in group set is different from the original plan(i.e. logical plan). + +After adding another new rule `agg-project-filter-project-scan`, the original plan can match this new one after trimming(it will add another project before scan), so the new digest has exactly the same ref index in the group set as logical plan. + +But overall, though the aggregation digest is different, their generated request builders are exactly the same. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java:None` + + +nit: I feel it's a little confusing because it's semantics seem to be *buket not nullable* + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java:None` + + +You're right. That's a typo. Will correct it. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4841: [BugFix] Fix Memory Exhaustion for Multiple Filtering Operations in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4841 + +**Author:** @RyanL1997 + +**Created:** 2025-11-21T04:46:35Z + +**State:** MERGED + +**Merged:** 2025-12-02T21:40:20Z + +**Changes:** +291 -126 (37 files) + +**Labels:** `PPL`, `backport 2.19-dev`, `v3.4.0`, `bugFix` + + +## Description + +### Description +Implemented a filter accumulation mechanism that combines multiple filter conditions into a single Filter RelNode before Calcite's optimization phase begins, preventing the deep Filter chains that trigger combinatorial explosion. + +### Implementation Summary + +The fix introduces automatic detection and accumulation of filtering operations: + +1. **Automatic Detection**: When a query is analyzed, the system counts all filtering operations ( e.g. `regex` and `where`) in the AST. If 2 or more filtering operations are detected, filter accumulation mode is automatically enabled. + +2. **Filter Accumulation**: Instead of creating individual Filter RelNodes for each `regex` or `where` operation, all filter conditions are collected in a list during the analysis phase. + +3. **Single Combined Filter**: All accumulated conditions are combined with AND operations into a single Filter RelNode, preventing the deep chains that caused memory exhaustion. + +4. **Schema-Aware Flushing**: Accumulated filters are flushed before any schema-changing operations (like `fields`) to ensure field references remain valid. + +5. **No change needed for command implementations**: The fix is completely automatic - no query rewriting or user action required. Queries produce identical results to the original implementation. + +### How It Works: Before vs After + +#### Before (Memory Explosion) +```bash +Query: source=t | regex f1="..." | regex f2="..." | ... | regex f10="..." | fields f1 + +Analysis Phase: + Filter(regex10) + └─ Filter(regex9) + └─ Filter(regex8) + └─ ... (deeply nested) + └─ Scan(t) + +Optimization Phase (FilterMergeRule): + - Tries to merge filters + - Generates all possible orderings: 10! = 3,628,800 combinations + - Each combination creates intermediate RelNode objects + - Memory exhaustion before execution even starts +``` + +#### After (Efficient Single Filter) +```bash +Query: source=t | regex f1="..." | regex f2="..." | ... | regex f10="..." | fields f1 + +Analysis Phase: + Filter(regex1 AND regex2 AND ... AND regex10) // Single combined filter + └─ Scan(t) + +Optimization Phase (FilterMergeRule): + - Only one Filter node to optimize + - No combinatorial explosion + - Memory usage remains constant regardless of filter count +``` + +### Results +- Memory usage is now constant regardless of the number of regex operations +- Queries with 10+ regex clauses complete successfully +- No combinatorial explosion during Calcite optimization +- Performance improved significantly as Calcite doesn't waste time on filter reordering +- Solution is automatic - no query rewriting required + +### Related Issues +* Relate #4842 as a quick fix for now + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Summary by CodeRabbit + +* **New Features** + * Query engine now consolidates adjacent filters into single combined predicates, producing simpler, more efficient plans. + +* **Bug Fixes** + * Eliminated unnecessary nested filters and redundant NULL checks to improve filter pushdown and plan clarity. + +* **Tests** + * Added integration tests covering merged-filter scenarios and behaviour with consecutive WHERE/IS NOT NULL/range filters. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + +## Reviews + + +### @penghuo - DISMISSED + + +Code looks good. 2 concerns regarding to performance impact. We should monitor the Big5 benchmark closely as next step. +1. Introduce HepPlanner rules. +2. After filter_merge, exists filters were added. Because missing_bucket=false, these filters are redundant. + + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +This means we still have the same problem with `where` clauses that have function calls, right? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Can we avoid this change? The concern is that every visitorXXX would need to perform extra work to merge filters. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Could we apply FILTER_MERGE rule on RelNode? Here are 2 options, +1. Apply Calcite CoreRules.FILTER_MERGE before VolcanoPlanner plan +2. Customized a RelNode visitor and apply FILTER_MERGE rule. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +change to static. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:278` + + +not sure performance impact, did u verify? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +does this change needed? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +revert unnecessary change. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +good catch, this is actually coming from the previous approach. The `analyze` method on line just calls `unresolved.accept(this, context)`. So on line 2250, calling `analyze(prunedSubSearch, context)` is functionally equivalent to the original `prunedSubSearch.accept(this, context)`. I have reverted it back. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +good point. Fixed + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +fixed. good catch + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +fixed + + +### @penghuo on `integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml:11` + + +After filter_merge, three exists filters were added. Because missing_bucket=false, these filters are redundant. +``` +{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}} +``` +I did not expect any performance regression from this change, but we should monitor the Big5 benchmark closely. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:278` + + + I just scripted a mini benchmark break down by directly leverage the clickbench IT queries. The following report shows the detailed performance of each planning phase - in summary, performance testing shows filter merge adds only 0.19ms average overhead (10% of planning time, <1% of total query time). + +```bash +> python3 analyze_performance.py + +Analyzing log file: /Users/jiallian/Desktop/opensearch/sql-team/cve-fix/sql/integ-test/build/testclusters/integTest-0/logs/integTest.log +Using test log for query names: /Users/jiallian/Desktop/opensearch/sql-team/cve-fix/sql/performance_results.log + +================================================================================ +FILTER MERGE PERFORMANCE ANALYSIS +================================================================================ + +📊 OVERALL STATISTICS (168 queries) +-------------------------------------------------------------------------------- +Filter Merge Time: + Mean: 186 μs ( 0.19 ms) + Median: 103 μs ( 0.10 ms) + Std Dev: 197 μs + Min: 41 μs ( 0.04 ms) + Max: 1541 μs ( 1.54 ms) + +Total Planning Time: + Mean: 1870 μs ( 1.87 ms) + Median: 1750 μs ( 1.75 ms) + +Filter Merge as % of Planning: + Mean: 9.87% + Median: 6.22% + Max: 47.52% + +================================================================================ +📈 PERFORMANCE ASSESSMENT +-------------------------------------------------------------------------------- +Average overhead: 0.19ms (9.9% of planning) +Recommendation: No optimization needed. Merge immediately. + +================================================================================ +📊 PERCENTILE ANALYSIS +-------------------------------------------------------------------------------- +Filter Merge Time Percentiles: + p50: 105 μs ( 0.10 ms) + p95: 477 μs ( 0.48 ms) + p99: 1541 μs ( 1.54 ms) + +================================================================================ +⏱️ PLANNING PHASE BREAKDOWN +-------------------------------------------------------------------------------- +Phase Averages: + Analyze: 1672 μs ( 89.4%) + Filter Merge: 186 μs ( 10.0%) ← THIS IS WHAT WE ADDED + Optimize: 9 μs ( 0.5%) + Convert: 0 μs ( 0.0%) + TOTAL: 1870 μs (100.0%) + +================================================================================ +🐢 TOP 10 SLOWEST FILTER MERGE TIMES +-------------------------------------------------------------------------------- +Rank Query Avg Merge Time Max Merge Time % of Planning +-------------------------------------------------------------------------------- +1 Query46 1541 μs ( 1.54ms) 1541 μs ( 1.54ms) 47.5% +2 Query29 543 μs ( 0.54ms) 543 μs ( 0.54ms) 25.5% +3 Query24 529 μs ( 0.53ms) 529 μs ( 0.53ms) 24.5% +4 Query54 513 μs ( 0.51ms) 513 μs ( 0.51ms) 18.8% +5 Query44 477 μs ( 0.48ms) 477 μs ( 0.48ms) 16.1% +6 Query23 445 μs ( 0.45ms) 445 μs ( 0.45ms) 22.9% +7 Query15 390 μs ( 0.39ms) 390 μs ( 0.39ms) 19.9% +8 Query71 388 μs ( 0.39ms) 388 μs ( 0.39ms) 20.4% +9 Query16 377 μs ( 0.38ms) 377 μs ( 0.38ms) 17.8% +10 Query55 351 μs ( 0.35ms) 351 μs ( 0.35ms) 18.9% + +================================================================================ +📈 DISTRIBUTION ANALYSIS +-------------------------------------------------------------------------------- +Filter Merge Time Distribution: + <100μs 82 ( 48.8%) ████████████████████████ + 100-500μs 78 ( 46.4%) ███████████████████████ + 500-1000μs (1ms) 6 ( 3.6%) █ + 1-5ms 2 ( 1.2%) + 5-10ms 0 ( 0.0%) + >10ms 0 ( 0.0%) + +================================================================================ +📄 Detailed CSV exported to: /Users/jiallian/Desktop/opensearch/sql-team/cve-fix/sql/performance_analysis.csv +================================================================================ +``` + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +The entire design has changed - right now we apply Calcite `CoreRules.FILTER_MERGE` before `VolcanoPlanner` plan + + + +## General Comments + + +### @RyanL1997 + + +Transferring some of the communication with @penghuo here: + +First, I agree that the previous implementation infected too much of the original calcite relnode visitor class logic, by applying +```java +... + flushFiltersBeforeSchemaChange(context); +... +``` +to plethora of `visit()` logic, which is not convenient for future development of PPL commands with this fix. + +Second, I tried the both @penghuo's suggestions +> Apply Calcite CoreRules.FILTER_MERGE before VolcanoPlanner plan +Customized a RelNode visitor and apply FILTER_MERGE rule. + +with [3dfd44b](https://github.com/opensearch-project/sql/pull/4841/commits/3dfd44b476c21395f0a7a8e763be15435e5535ab) and [ad43837](https://github.com/opensearch-project/sql/pull/4841/commits/ad43837802ace79eaff2499580b859967db4ef0e) and both of them works. + +According to the above, I selected the approach of applying Calcite's FilterMergeRule directly in QueryService.java (using HepPlanner) for the following reasons: + +- Tree construction happens in `CalciteRelNodeVisitor`, while optimization is applied as a post-processing step in `QueryService` +- Leverages the existing `FilterMergeRule.Config.DEFAULT.toRule()` instead of custom visitor logic +- The filter merge happens in the same place for both production execution (`executeWithCalcite()`) and explain queries (`explainWithCalcite()`) + + + +--- + +# PR #4840: [Backport 2.19-dev] Fix search anoymizer only (#4783) + +**URL:** https://github.com/opensearch-project/sql/pull/4840 + +**Author:** @xinyual + +**Created:** 2025-11-21T02:33:32Z + +**State:** MERGED + +**Merged:** 2025-11-21T03:08:44Z + +**Changes:** +136 -22 (15 files) + + +## Description + +### Description +Backport #4783 to 2.19 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@yuancu please correct the merge information next time + + + +### @yuancu + + +> @yuancu please correct the merge information next time + +Sorry, it was too late by the time I realized it + + +--- + +# PR #4839: Support `mvfind` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4839 + +**Author:** @ahkcs + +**Created:** 2025-11-20T21:23:33Z + +**State:** MERGED + +**Merged:** 2025-12-16T22:15:36Z + +**Changes:** +686 -1 (11 files) + +**Labels:** `enhancement`, `PPL` + + +## Description + +## Description + + This PR implements the `mvfind` eval function for PPL, which searches a multivalue array and returns the 0-based index of the first element matching a regular expression pattern. Returns NULL if no match is found. + + ## Implementation Approach + + ### Attempted: Native Calcite SQL Functions + We initially explored using native Calcite SQL functions to avoid creating a UDF. We tried ARRAY-related Calcite functions like `ARRAY_POSITION`, but we didn't find any functions that support REGEX. For functions that do support REGEX (like `REGEXP_CONTAINS`, `REGEXP_LIKE`), they all take string as a parameter instead of array. + + ### Final Solution: UDF Implementation + Implemented as a UDF following the pattern of `EXISTS` and `FORALL` functions, using Java's `Pattern` and `Matcher` for regex matching. + + ## Examples + + ```sql + -- Basic pattern matching + source=people | eval result = mvfind(array('apple', 'banana', 'apricot'), 'ban.*') | fields result + > 1 + + -- No match returns NULL + source=people | eval result = mvfind(array('cat', 'dog', 'bird'), 'fish') | fields result + > null + + -- Regex with character class + source=people | eval result = mvfind(array('error123', 'info', 'error456'), 'error[0-9]+') | fields result + > 0 + + -- Case-insensitive matching + source=people | eval result = mvfind(array('Apple', 'Banana', 'Cherry'), '(?i)banana') | fields result + > 1 + +``` + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Should it be NullPolicy.ANY? (looks it returns null when one of the parameter is null) + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Is it possible to avoid compiling everytime? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:51` + + +Does type coercion work and accept number as regex? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:159` + + +nit: I'd recommend having separate method `mvfind(List array, String regex)` to separate preprocess and core logic. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Does this lead invalid regex to be 5xx error? Can we check regex earlier (during planning) and raise 4xx error instead? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Updated to NullPolicy.ANY + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Moved the compiling to planning time + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:51` + + +Updated to support type coercion, also added corresponding UT + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:159` + + +Updated to separate method + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Updated error handling + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:51` + + +I thought `CoercionUtils` would automatically add cast. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +nit: seems redundant comment. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +nit: Let's extract for maintainability/readability. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Seems it is used only by `evalWithString`. Let's move it to just below `evalWithString`. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +I think calcite automatically return null in case of null parameter. + + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImplTest.java:None` + + +nitnit: redundant comment + + +### @ahkcs on `core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImplTest.java:None` + + +Removed + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Updated comment + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Updated + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Updated + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Removed + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:51` + + +After checking, the tests would fail if we go through CoercionUtils, it seems that we don't have coercion rule for Integer -> String? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:70` + + +There seems no IT to cover non-literal regex. Just wonder is this required and supported in other PPL regex function/command? Not sure what's the performance impact for this. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +For the second argument, I think type coercion should be defined and performed in single place? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:None` + + +Hi Chen, after checking, I updated to restrict regex parameter to STRING type only. Removed implicit type coercion +(integer -> string) since the regex pattern should always be a string. +Non-string types will now fail at type checking. + + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java:70` + + +Updated to add an IT `testMvfindWithDynamicRegex` + + +## General Comments + + +### @ykmr1224 + + +Can you resolve conflict? + + +--- + +# PR #4838: Execute yamlRestTest in integration job + +**URL:** https://github.com/opensearch-project/sql/pull/4838 + +**Author:** @ykmr1224 + +**Created:** 2025-11-20T20:02:35Z + +**State:** MERGED + +**Merged:** 2025-11-21T00:46:44Z + +**Changes:** +4 -4 (1 files) + +**Labels:** `maintenance` + +**Assignees:** @ykmr1224 + + +## Description + +### Description +- In Github workflow, execute yamlRestTest in `integration` job instead of `unit` + - Just for consistency (I felt weird to see the yamlRestTest failure in `unit` result) +- Checked workflow execution result. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4837: Update syntax: like(string, PATTERN[, case_sensitive]) + +**URL:** https://github.com/opensearch-project/sql/pull/4837 + +**Author:** @LantaoJin + +**Created:** 2025-11-20T15:35:15Z + +**State:** MERGED + +**Merged:** 2025-11-25T05:46:34Z + +**Changes:** +561 -95 (54 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `clickbench`, `bugFix` + + +## Description + +### Description +1. We miss a case-sensitive `like` function. +2. To align with SparkSQL and SPL behavior, add case-sensitive argument to`like` function and add a new case-sensitive `ilike` function. + +LIKE +---- + +Usage: like(string, PATTERN[, case_sensitive]) return true if the string match the PATTERN. ``case_sensitive`` is optional. When set to ``true``, PATTERN is **case-sensitive**. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. + + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``case_sensitive`` defaults to ``false`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``case_sensitive`` defaults to ``true`` + + +**Plan**: +`where like(firstname, '%mbe%', true)`(PPL) +⬇️ +`LogicalFilter(condition=[LIKE($1, '%mbe%', '\')])`(RelNode) +⬇️ +`WHERE firstname LIKE '%mbe%' ESCAPE '\'`(SparkSQL) + +ILIKE +---- + +Usage: ilike(string, PATTERN) return true if the string match the PATTERN, PATTERN is **case-insensitive**. + + +**Plan** +`where ilike(firstname, '%mbe%')` or `where like(firstname, '%mbe%', false)`(PPL) +⬇️ +`LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')])`(RelNode) +⬇️ +`WHERE firstname ILIKE '%mbe%' ESCAPE '\'`(SparkSQL) + +### Related Issues +Resolves #4835 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java:130` + + +Is this duplicated with `test_like_should_be_case_sensitive` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java:130` + + +you are right, this test was removed from CalciteLikeQueryIT to LikeQueryIT in last commit. It was partially duplicated with `test_like_should_be_case_sensitive` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4831: Add `bucket_nullable` argument for `Streamstats` command + +**URL:** https://github.com/opensearch-project/sql/pull/4831 + +**Author:** @ishaoxy + +**Created:** 2025-11-20T09:17:11Z + +**State:** MERGED + +**Merged:** 2025-11-26T05:38:41Z + +**Changes:** +672 -171 (25 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Similar with `stats` and `eventstats`, now we add `bucket_nullable` argument for `streamstats`. +ref: #4817 + +### Related Issues +Resolves #4802 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - DISMISSED + + +LGTM overall + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1758` + + +I think we need a similar work with following code to add a hint to the Project (A better way is adding hint on LogicalWindow but seems we cannot do it now). So does `eventstats`. This hint is useful during plan optimizing. https://github.com/opensearch-project/sql/blob/a8069d18a360396594d4c47e672babc56a21a2fa/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java#L1057 + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1758` + + +OK, will try. + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +nitpick: It looks a little redundant to me because the default value is already set in `getArgumentList(StreamstatsCommandContext, Settings)`. It may cause confusion that the default value is `TRUE` instead of of depending on the setting. + + +### @ishaoxy on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Thanks for the suggestion! But let me just keep this code to align with command `stats`. + + +### @ishaoxy on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Fixed now. + + +## General Comments + + +### @LantaoJin + + +@ishaoxy please backport^ + + +--- + +# PR #4830: Support sort expression pushdown for SortMergeJoin + +**URL:** https://github.com/opensearch-project/sql/pull/4830 + +**Author:** @songkant-aws + +**Created:** 2025-11-20T07:43:05Z + +**State:** MERGED + +**Merged:** 2025-12-10T05:54:04Z + +**Changes:** +431 -127 (17 files) + +**Labels:** `enhancement`, `backport-manually` + + +## Description + +### Description +We observed there are some common user queries following the pattern like `source=tableA | rex '......' | join left=a right=b on a.rex_field=b.field tableB`. Join keys reference to a expression output instead of original table field. This query will be transformed to SortMergeJoin where it sorts by join keys on each join child. + +SortMergeJoin inserts EnumerableSort operators at physical plan optimization. Previous sort expression pushdown optimization #4750 only provides ability of logical plan optimization, aka pushdown LogicalSort into scan. This PR expands the ability to push down EnumerableSort by simple or complex sort expressions in case of SortMergeJoin. + +### Related Issues +Resolves #4823 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:1030` + + +I found that this query is not executable with the following error: + +``` +java.lang.RuntimeException: java.sql.SQLException: exception while executing query: class java.lang.Long cannot be cast to class java.lang.Integer (java.lang.Long and java.lang.Integer are in module java.base of loader 'bootstrap') + at org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.lambda$execute$2(OpenSearchExecutionEngine.java:217) ~[opensearch-sql-3.4.0.0-SNAPSHOT.jar:3.4.0.0-SNAPSHOT] +``` +It may result from the type difference between age and balance. I see quite a lot type cast to `Integer` in the generated code. + +This should be another problem that's irrelevant to this optimization though. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:1030` + + +@songkant-aws does this issue be fixed? I don't see any IT for correctness. can you add more tests in `CalcitePPLJoinIT`? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:1019` + + +add a case of +``` +source=opensearch-sql_test_index_bank +| rex field=lastname \\\"(?^[A-Z])\\\" +| join type=left max=1 initial opensearch-sql_test_index_bank +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:511` + + +could it check type family? + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:1030` + + +This is caused by type mismatch at runtime due to different index mapping types. The plan explanation can still pass. I think it's not a scope of this PR. Dynamic type conversion at runtime or type validation checker could potentially address it. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:511` + + +The SqlTypeFamily check is almost the same with above raw types check. Also, SqlTypeFamily check allows ANY type, which is not expected here. + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:1019` + + +Added this explain test, and some other IT test cases to verify correctness. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4829: Version bump 2.19-dev to 2.19.4 + +**URL:** https://github.com/opensearch-project/sql/pull/4829 + +**Author:** @Swiddis + +**Created:** 2025-11-20T00:30:50Z + +**State:** MERGED + +**Merged:** 2025-11-20T18:03:43Z + +**Changes:** +2 -2 (2 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Updates our 2.19 build artifact and the version of OS we're building against. Some weird bugs are coming from relying on OS 2.19.0's Netty version. + +### Related Issues +Any request with `Accept-Encoding: zstd` hangs with the 2.19.0 artifact. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4828: Support `mvdedup` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4828 + +**Author:** @ahkcs + +**Created:** 2025-11-19T18:58:16Z + +**State:** MERGED + +**Merged:** 2025-11-24T18:47:51Z + +**Changes:** +206 -0 (8 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + + +
+

Description

+

This PR implements the mvdedup eval function for PPL, enabling users to remove duplicate values from multivalue arrays.

+
+

Behavior

+

Given the input:

+
source=index | eval result = mvdedup(array(1, 2, 2, 3, 1, 4))
+
+

The function returns:

+
[1, 2, 3, 4]
+
+
+

Key Details

+
    +
  • +

    Empty arrays return empty arrays

    +
  • +
  • +

    Order-preserving: The first appearance of each value is kept; subsequent duplicates are removed.

    +
  • +
+
+

Example

+ +Input | Output +-- | -- +array(1, 2, 2, 3, 1, 4) | [1, 2, 3, 4] +array() | [] + + + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Could you check if this has same semantic as `ARRAY_DISTINCT`? + +``` +2: jdbc:calcite:model=src/test/resources/mode> SELECT ARRAY_DISTINCT(ARRAY[1,2,1,3,3,4]); ++--------------+ +| EXPR$0 | ++--------------+ +| [1, 2, 3, 4] | ++--------------+ +1 row selected (0.018 seconds) + +2: jdbc:calcite:model=src/test/resources/mode> SELECT ARRAY_DISTINCT(ARRAY[4,1,2,1,3,3,4]); ++--------------+ +| EXPR$0 | ++--------------+ +| [4, 1, 2, 3] | ++--------------+ +``` + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVDedupCore.java:None` + + +nit: do we need separate class? If it is used only in MVDedupFunctionImpl, it is better have it in the same class. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVDedupFunctionImpl.java:None` + + +Do we need to accept non-array type? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVDedupFunctionImpl.java:None` + + +Should we add OperandMetadata to enable parameter type check? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVDedupCore.java:None` + + +Let's add comprehensive unit tests for the core logic. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVDedupCore.java:None` + + +Resolving due to change on implementation + + +## General Comments + + +### @ahkcs + + +> Could you check if this has same semantic as `ARRAY_DISTINCT`? + +Hi @dai-chen, thanks for the suggestion! After checking, I think `ARRAY_DISTINCT` is suitable for our implementation for `mvdedup` eval function, I have updated the implementation to remove the UDF and use `ARRAY_DISTINCT` for implementation. + +cc @ykmr1224 + + + + +### @dai-chen + + +Do we need to backport? + + +### @ahkcs + + +> Do we need to backport? + +Yes + + +--- + +# PR #4827: [Backport 2.19-dev] Fix timezone dependent test failures #4367 + +**URL:** https://github.com/opensearch-project/sql/pull/4827 + +**Author:** @Swiddis + +**Created:** 2025-11-19T18:42:13Z + +**State:** MERGED + +**Merged:** 2025-11-27T05:48:37Z + +**Changes:** +100 -63 (8 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Manual backport of https://github.com/opensearch-project/sql/pull/4367. + +Tested locally with `-Duser.timezone=Asia/Tokyo`. + +### Related Issues +Test failures on 2.19-dev after the date rollover if in a timezone other than UTC + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4826: [Backport 2.19-dev] Doc update for `json_valid` + +**URL:** https://github.com/opensearch-project/sql/pull/4826 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-19T09:12:56Z + +**State:** MERGED + +**Merged:** 2025-11-24T09:02:43Z + +**Changes:** +26 -0 (1 files) + + +## Description + +Backport b5c8a545b386bc95a2c314a56a2a9a1bb0788d8a from #4803. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4825: Push down filters on nested fields as nested queries + +**URL:** https://github.com/opensearch-project/sql/pull/4825 + +**Author:** @yuancu + +**Created:** 2025-11-19T07:38:11Z + +**State:** MERGED + +**Merged:** 2026-01-15T06:27:03Z + +**Changes:** +508 -19 (20 files) + +**Labels:** `enhancement`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description + +Because [nested fields](https://docs.opensearch.org/latest/mappings/supported-field-types/nested/) are indexed as hidden documents, we cannot query them directly. Instead, we have to use the [nested query](https://docs.opensearch.org/latest/query-dsl/joining/nested/) to access them. + +For example, for the following mapping: +```json +{ + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "items": { + "type": "nested", + "properties": { + "name": {"type": "keyword"} + } + } + } + } +} +``` + +if we want to use term query to match those items with name `banana`, instead of using: +```json +{ + "query": { + "term": { + "items.name": { + "value": "banana", + "boost": 1 + } + } + } +} +``` + +We should use +```json +{ + "query": { + "nested": { + "path": "items", + "query": { + "term": { + "items.name": { + "value": "banana", + "boost": 1 + } + } + } + } + } +} +``` + +Here, the `nested` clause *steps down* into the nested `items` field. It no longer has access to fields in the root document, nor fields in any other nested document. + +### Work items + + +### Related Issues +Resolves #4508 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +This PR doesn't count in the case that the filter is not pushed down. +``` +source=t | head 10000 | where items.name = "xx" +``` +Above query will return incorrect results. + +This issue is similar to the #3696 which I am working on refactor to avoid the correctness issue in non-pushdown case. + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1534` + + +Since we have supported Agg script and Sort script now, I think we need to identify whether it's a filter script before wrapping it with nested query. And please add a test on agg script with nested fields. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1534` + + +- Script sort does not make use of `PredicateAnalyzer.QueryExpression`, thus does not interleave with the current change: +https://github.com/opensearch-project/sql/blob/c31227cd98e0030f7a1a89cf1819c6354e480104/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java#L499 +- The same is with field sort +https://github.com/opensearch-project/sql/blob/c31227cd98e0030f7a1a89cf1819c6354e480104/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java#L476 + +However, they still need nested queries to access nested fields. As a result, the following queries does not work (`author` is a nested field) +- script sort: `source=opensearch-sql_test_index_cascaded_nested | eval lower_name = lower(author.name) | sort lower_name` +- field sort: `source=opensearch-sql_test_index_cascaded_nested | sort author.name` + +Should I fix them in this PR or another one? + +For agg script, I added a test case. Yet I doubt I still miss many cases? Originally I thought they were only handled in `AggregateAnalyzer`, so I could raise another PR for nested fields in aggregations. + + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative_mutated.yaml:None` + + +why the PR add this yaml file? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_mutated.yaml:None` + + +ditto + + +### @yuancu on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative_mutated.yaml:None` + + +I accidentally added them. Removed. + +Thanks for pointing out. They were used only for local tests. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1525` + + +Do we have test for this case? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1525` + + +Seems like a very corner case. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1525` + + +Yes, `testScriptFilterOnDifferentNestedHierarchyShouldThrow` tests this case. It happens when users try to operate on two nested fields of different levels. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4824: [3.4.0] Bump Gradle to 9.2.0 and GitHub Action JDK to 25 + +**URL:** https://github.com/opensearch-project/sql/pull/4824 + +**Author:** @LantaoJin + +**Created:** 2025-11-19T05:58:48Z + +**State:** MERGED + +**Merged:** 2025-11-20T06:48:18Z + +**Changes:** +810 -711 (55 files) + +**Labels:** `infrastructure`, `maintenance`, `v3.4.0` + + +## Description + +### Description +Bump Gradle to 9.2.0 and GitHub Action JDK to 25 +And corresponding changes (Gradle plugins): +- me.champeau.jmh 0.6.8 -> 0.7.3 (latest) +- com.diffplug.spotless 7.2.1 -> 8.1.0 (latest) +- googleJavaFormat 1.17.0 -> 1.32.0 (latest) +- info.solidsoft.pitest 1.9.0 -> 1.19.0-rc.2 (latest) + +### Related Issues +Resolves #4722 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `gradlew.bat:73` + + +curious why isn't this var `CLASSPATH` removed but reset to empty + + +### @LantaoJin on `gradlew.bat:73` + + +auto generated by +``` +./gradlew wrapper --gradle-version 9.2.0 --distribution-type all --gradle-distribution-sha256-sum 16f2b95838c1ddcf7242b1c39e7bbbb43c842f1f1a1a0dc4959b6d4d68abcac3 +``` + + +## General Comments + + +### @LantaoJin + + +cc @penghuo @peterzhuamazon + + +--- + +# PR #4821: [Backport 2.19-dev] Support escaped field names in SPath parsing + +**URL:** https://github.com/opensearch-project/sql/pull/4821 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-18T23:33:44Z + +**State:** MERGED + +**Merged:** 2025-11-19T18:47:54Z + +**Changes:** +72 -8 (9 files) + + +## Description + +Backport 5a68caabb0e6f7179eb35cad5a8492ff158ab7fe from #4813. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4818: [Backport 2.19-dev] Pushdown sort by complex expressions to scan (#4750) + +**URL:** https://github.com/opensearch-project/sql/pull/4818 + +**Author:** @songkant-aws + +**Created:** 2025-11-18T08:30:14Z + +**State:** MERGED + +**Merged:** 2025-11-19T09:07:39Z + +**Changes:** +1841 -40 (39 files) + + +## Description + +### Description +Backport #4750 + +### Related Issues + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4817: Add `bucket_nullable` argument for `Eventstats` + +**URL:** https://github.com/opensearch-project/sql/pull/4817 + +**Author:** @ishaoxy + +**Created:** 2025-11-18T07:41:45Z + +**State:** MERGED + +**Merged:** 2025-11-19T17:38:40Z + +**Changes:** +199 -6 (11 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Similar with `Stats`, now we add `bucket_nullable` argument for `Eventstats`, to control whether it consider null buckets as a valid group in group-by aggregations. + +### Related Issues +Resolves #4801 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@ishaoxy please resolve the conflicts + + +### @ishaoxy + + +Conflicts are resolved. @LantaoJin + + +### @LantaoJin + + +@ishaoxy need to backport manually + + +--- + +# PR #4814: Support `split` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4814 + +**Author:** @ahkcs + +**Created:** 2025-11-14T22:21:48Z + +**State:** MERGED + +**Merged:** 2025-12-08T21:47:56Z + +**Changes:** +218 -0 (8 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +## **Summary** + +This PR implements the `split` eval function for PPL, enabling users to split strings into multivalue arrays based on a delimiter. + +## **Examples** + +### **Basic split with semicolon** + +```ppl +source=people | eval result = split('a;b;c', ';') +``` + +Result: `['a', 'b', 'c']` + +--- + +### **Split into individual characters (empty delimiter)** + +```ppl +source=people | eval result = split('abcd', '') +``` + +Result: `['a', 'b', 'c', 'd']` + +--- + +### **Multi-character delimiter** + +```ppl +source=people | eval result = split('name::value', '::') +``` + +Result: `['name', 'value']` + +--- + +### **Split field value** + +```ppl +source=people | eval words = split(employer, ' ') +``` + +Splits the `employer` field on spaces. + +--- + + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Can this be reused? https://issues.apache.org/jira/browse/CALCITE-6951 + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +### @ykmr1224 - APPROVED + + +LGTM + + +## Review Comments + + +### @dai-chen on `docs/user/ppl/functions/collection.rst:240` + + +This covers delimiter not found case right? The result should be a original string or an array of original string? + + +### @ahkcs on `docs/user/ppl/functions/collection.rst:240` + + +Yes, if delimiter is not found, the result should be an array of original string + + +## General Comments + + +### @ahkcs + + +> https://issues.apache.org/jira/browse/CALCITE-6951 + +Hi @dai-chen , currently we are using `SPLIT`, which is also supported by Calcite. And we did some special handling for empty delimiter on top of that. + +Here's the documentation for Calcite `SPLIT` function: + +``` + +SPLIT(string [, delimiter ]) + +``` + +Returns the string array of string split at delimiter (if omitted, default is comma). If the string is empty it returns an empty array, otherwise, if the delimiter is empty, it returns an array containing the original string. + + +### @dai-chen + + +> > https://issues.apache.org/jira/browse/CALCITE-6951 +> +> Hi @dai-chen , currently we are using `SPLIT`, which is also supported by Calcite. And we did some special handling for empty delimiter on top of that. +> +> Here's the documentation for Calcite `SPLIT` function: +> +> ``` +> +> SPLIT(string [, delimiter ]) +> ``` +> +> Returns the string array of string split at delimiter (if omitted, default is comma). If the string is empty it returns an empty array, otherwise, if the delimiter is empty, it returns an array containing the original string. + +I see. So the only reason of `SplitFunctionImp` is special handling for `delimiter=""`, right? +Can the extract function below help? + +- Case 1: Delimiter is not empty string, translate split to + +``` +SELECT SPLIT('a;b;c;d', ';'); ++--------------+ +| EXPR$0 | ++--------------+ +| [a, b, c, d] | ++--------------+ +``` + +- Case 2: Delimiter is empty string, translate split to: + +``` +SELECT REGEXP_EXTRACT_ALL('abcd', '.'); ++--------------+ +| EXPR$0 | ++--------------+ +| [a, b, c, d] | ++--------------+ +``` + + +### @ahkcs + + +@dai-chen Thanks for the suggestion! I think it makes sense. I have updated the PR to move the implementation to `PPLFuncImpTable` + + +--- + +# PR #4813: Support escaped field names in SPath parsing + +**URL:** https://github.com/opensearch-project/sql/pull/4813 + +**Author:** @Swiddis + +**Created:** 2025-11-14T21:46:55Z + +**State:** MERGED + +**Merged:** 2025-11-18T23:33:30Z + +**Changes:** +72 -8 (9 files) + +**Labels:** `PPL`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Implemented the unquoting as part of #4185 but we decided against the full change. But the unquoting itself is still necessary for e.g. `@timestamp`, and rewriting nontrivial paths to json_extract. This PR implements string-quoted spath paths. + +### Related Issues +Unable to parse literal dots in fields like `{"a.b.c": "value"}` + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - COMMENTED + + +Update doc and doctest to explain this feature. + + +## Review Comments + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java:None` + + +double quoted also supported? "attributes.['cluster.name']" +I think SPL spath only support double quote. + + +### @Swiddis on `ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java:None` + + +Added support for string escapes + relevant testing/docs + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4810: [Backport 2.19-dev] Fix UT failure and Linkchecker failure + +**URL:** https://github.com/opensearch-project/sql/pull/4810 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-14T05:22:38Z + +**State:** MERGED + +**Merged:** 2025-11-14T06:21:13Z + +**Changes:** +6 -6 (2 files) + + +## Description + +Backport 3ca040c3d67b69c47dc52149af5de96a63fa6b77 from #4809. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4809: Fix UT failure and Linkchecker failure + +**URL:** https://github.com/opensearch-project/sql/pull/4809 + +**Author:** @LantaoJin + +**Created:** 2025-11-14T03:49:00Z + +**State:** MERGED + +**Merged:** 2025-11-14T05:22:22Z + +**Changes:** +6 -6 (2 files) + +**Labels:** `infrastructure`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Fix UT failures due to merge conflicts #4793 and #4794 +Fix the linkchecker task failure caused by the unstable jira link. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4808: [Backport 2.19-dev] Fix eval on grouped fields after timechart + +**URL:** https://github.com/opensearch-project/sql/pull/4808 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-14T02:30:19Z + +**State:** MERGED + +**Merged:** 2025-11-14T03:27:13Z + +**Changes:** +93 -6 (2 files) + + +## Description + +Backport 7b30c9bb661a98a8989c0dcbea4bb934c981f3bd from #4758. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4807: [Backport 2.19-dev] Fix function identify problem in converting to sql dialect + +**URL:** https://github.com/opensearch-project/sql/pull/4807 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-14T01:05:35Z + +**State:** MERGED + +**Merged:** 2025-11-14T02:40:29Z + +**Changes:** +196 -177 (32 files) + + +## Description + +Backport 3a91e5c31f879c843415a7301bda871f0a3a20aa from #4793. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4805: Support `mvzip` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4805 + +**Author:** @ahkcs + +**Created:** 2025-11-13T21:21:22Z + +**State:** MERGED + +**Merged:** 2025-12-15T21:07:52Z + +**Changes:** +457 -1 (10 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +## **Description** + +The `mvzip` function combines values from two multivalue fields pairwise with a delimiter. +It stitches together corresponding elements from each field, similar to Python’s `zip()` function. + +The function supports two modes of operation: + +1. **Default delimiter:** + + ```ppl + mvzip(mv_left, mv_right) + ``` + + Combines fields using a comma (`,`) as the default delimiter. + +2. **Custom delimiter:** + + ```ppl + mvzip(mv_left, mv_right, delimiter) + ``` + + Combines fields using the specified delimiter. + +--- + +### **Key Features** + +* **Pairwise combination:** Combines 1st element of left with 1st of right, 2nd with 2nd, etc. +* **Stops at shorter length:** Processing stops at the length of the shorter field (Python `zip()` behavior). +* **Scalar handling:** Treats scalar values as single-element arrays. +* **Null handling:** Returns `null` if either input is `null`. +* **Default delimiter:** Uses comma (`,`) when delimiter is not specified. + +--- + +## **Usage Examples** + +### **Basic Usage with Default Delimiter** + +```ppl +source=people +| eval hosts = array('host1', 'host2'), ports = array(80, 443), nserver = mvzip(hosts, ports) +| fields nserver +# Returns: [host1,80, host2,443] + +source=accounts +| eval result = mvzip(firstname, lastname) +| fields result +# Returns: [Amber,Duke] +``` + +--- + +### **Custom Delimiter** + +```ppl +source=people +| eval arr1 = array('a', 'b', 'c'), arr2 = array('x', 'y', 'z'), result = mvzip(arr1, arr2, '|') +| fields result +# Returns: [a|x, b|y, c|z] + +source=accounts +| eval result = mvzip(firstname, lastname, ' ') +| fields result +# Returns: [Amber Duke] +``` + +--- + +### **Different Length Arrays** + +```ppl +source=people +| eval arr1 = array(1, 2, 3), arr2 = array('a', 'b'), result = mvzip(arr1, arr2) +| fields result +# Returns: [1,a, 2,b] +# Note: Stops at length of shorter array +``` + +--- + +### **Nested mvzip Calls** + +```ppl +source=people +| eval field1 = array('a', 'b'), field2 = array('c', 'd'), field3 = array('e', 'f'), result = mvzip(mvzip(field1, field2, '|'), field3, '|') +| fields result +# Returns: [a|c|e, b|d|f] +``` + +--- + +### **Null Handling** + +```ppl +source=people +| eval result = mvzip(nullif(1, 1), array('test')) +| fields result +# Returns: null +``` + + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +QQ: can SQL's `ARRAYS_ZIP` be used for this? + + +## Review Comments + + +### @dai-chen on `docs/user/ppl/functions/collection.rst:None` + + +Add or modify the example for nested `mvzip` with 3 multi values? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipFunctionImpl.java:None` + + +The first 2 argument type must be multiarray/array? If so, no need to accept object in implemention method `mvzip`? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipCore.java:None` + + +So if one or both array is empty, the result is null? + + +### @ahkcs on `docs/user/ppl/functions/collection.rst:None` + + +Added example for nested `mvzip` with 3 arrays + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipFunctionImpl.java:None` + + +Updated the `getOperandMetadata` method to only accept Array for the first 2 arguments + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipCore.java:None` + + +Updated the NullPolicy to be NullPolicy.ANY, also updated the IT tests and UT for cases where one or both array is empty + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipFunctionImpl.java:None` + + +Although string array argument type, Calcite requires `Object` arg here? + + +### @dai-chen on `docs/user/ppl/functions/collection.rst:None` + + +np: `head 1` is because `source=people` is required? Do we have an issue for adding `makeresults` command which would simplify this a lot? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipFunctionImpl.java:None` + + +Hi Chen, Calcite's reflection-based method lookup requires Object types - When Types.lookupMethod is called (lines 82-83, 90-91), it looks up the method signature using Object.class: +``` + Types.lookupMethod( + MVZipFunctionImpl.class, "mvzip", Object.class, Object.class, String.class) +``` + + + +### @ahkcs on `docs/user/ppl/functions/collection.rst:None` + + +Yes, head 1 is needed because source=people returns multiple rows. We have issue #3629 for adding a data generation command that is still open + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVZipFunctionImpl.java:None` + + +Updated to use `List.class` instead + + +### @ahkcs on `docs/user/ppl/functions/collection.rst:None` + + +Updated + + +## General Comments + + +### @ahkcs + + +> QQ: can SQL's `ARRAYS_ZIP` be used for this? + +Thanks for the question! I considered ARRAYS_ZIP but it's not suitable for mvzip due to semantic differences: + 1. Return Type: + - ARRAYS_ZIP → ARRAY (e.g., [STRUCT(0:'a', 1:'x'), STRUCT(0:'b', 1:'y')]) + - mvzip → ARRAY (e.g., ['a|x', 'b|y']) + 2. Delimiter Requirement: + - mvzip requires custom delimiter support for string concatenation + - ARRAYS_ZIP creates structured data without string formatting + + + +### @dai-chen + + +> > QQ: can SQL's `ARRAYS_ZIP` be used for this? +> +> Thanks for the question! I considered ARRAYS_ZIP but it's not suitable for mvzip due to semantic differences: +> +> 1. Return Type: +> - ARRAYS_ZIP → ARRAY (e.g., [STRUCT(0:'a', 1:'x'), STRUCT(0:'b', 1:'y')]) +> - mvzip → ARRAY (e.g., ['a|x', 'b|y']) +> 2. Delimiter Requirement: +> - mvzip requires custom delimiter support for string concatenation +> - ARRAYS_ZIP creates structured data without string formatting + +Does the `mvmap` function you're working help here? + + +### @ahkcs + + +> Does the `mvmap` function you're working help here? + +For `mvmap` function, we are using the existing `Transform` eval function for its implementation +https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/collection.rst#transform + +https://github.com/opensearch-project/sql/pull/4856 + + + +### @ykmr1224 + + +Can you resolve conflict, and address comment from CodeRabbit. + + +--- + +# PR #4804: [Backport 2.19-dev] Support `mvindex` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4804 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-13T19:13:02Z + +**State:** MERGED + +**Merged:** 2025-11-14T02:39:24Z + +**Changes:** +434 -0 (9 files) + + +## Description + +Backport dd52196dfc8a0ef03945aa75f5641b0b59c9e87b from #4794. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4803: Doc update for `json_valid` + +**URL:** https://github.com/opensearch-project/sql/pull/4803 + +**Author:** @ahkcs + +**Created:** 2025-11-13T18:43:08Z + +**State:** MERGED + +**Merged:** 2025-11-14T22:18:36Z + +**Changes:** +26 -0 (1 files) + +**Labels:** `documentation`, `backport 2.19-dev` + + +## Description + +### Description +Doc update for `json_valid` + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +Let's add `backport 2.19-dev` label to this PR + + +--- + +# PR #4799: [Backport 2.19-dev] Support using decimal as span literals + +**URL:** https://github.com/opensearch-project/sql/pull/4799 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-13T06:55:58Z + +**State:** MERGED + +**Merged:** 2025-11-14T02:39:41Z + +**Changes:** +103 -26 (8 files) + + +## Description + +Backport a3c90a84730c6398428513e573495c75edc09297 from #4717. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4798: [Backport 2.19-dev] Fix bug that `Streamstats` command incorrectly treats null as a valid group + +**URL:** https://github.com/opensearch-project/sql/pull/4798 + +**Author:** @ishaoxy + +**Created:** 2025-11-13T05:55:40Z + +**State:** MERGED + +**Merged:** 2025-11-13T07:27:07Z + +**Changes:** +313 -194 (10 files) + + +## Description + +### Description +backport #4777 to 2.19-dev + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4796: [Backport 2.19-dev] Merge the implementation of `timechart` and `chart` (#4755) + +**URL:** https://github.com/opensearch-project/sql/pull/4796 + +**Author:** @yuancu + +**Created:** 2025-11-13T02:45:20Z + +**State:** MERGED + +**Merged:** 2025-11-13T03:54:01Z + +**Changes:** +1103 -1429 (31 files) + + +## Description + +### Description +Backport #4755 to 2.19-dev + +(cherry picked from commit daf1795a0672c9f6ce1d1bd74fc58415963d099a) + +### Commit Message + +* Remove visitTimechart + + + +* Migrate per functions to Chart + + + +* Update CalcitePPLTimechartTest + + + +* Migrate TimecharTest to use Chart + + + +* Fix AST relevant tests + + + +* Remove Timechart AST object in favor of Chart + + + +* Update expected plans for timechart + + + +* Update doctest for timechart +- add 2 more indicies for test purpose + + + +* Add yaml tests for 4581, 4582, and 4632 + + + +* Allow flexible parameter positions for chart and timechart + + + +* Simplify CalciteTimechartCommandIT + + + +--------- + +### Related Issues +Resolves #4581, resolves #4582, resolves #4632 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4795: Perform RexNode expression standardization for script push down. + +**URL:** https://github.com/opensearch-project/sql/pull/4795 + +**Author:** @qianheng-aws + +**Created:** 2025-11-13T02:40:14Z + +**State:** MERGED + +**Merged:** 2025-11-19T05:59:39Z + +**Changes:** +800 -410 (82 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR includes changes: +1. Implement the step 1&2(Replace field and literal with parameters) described in the RFC:https://github.com/opensearch-project/sql/issues/4757. This will enhance our script cache to get higher hitting ratio. +2. Remove `ROW_TYPE` and `EXPR_MAP` in our script. Then the average script size can be reduced by ***2 to 5*** times than before. +3. Remove `OpenSearchRequestBuilder` when computing digest for `OpenSearchIndexScanOperator`, while keep it when generating explain plan. +4. Remove `OpenSearchRequestBuilder` in `PushDownContext` and make the related action lazy perform. Since we have change 3, it's less valuable to hold that object in each `PushDownContext`. +5. Tiny enhancement on the parameter of SORT_EXPR, see https://github.com/opensearch-project/sql/pull/4750#discussion_r2532987617 + +### Related Issues +Partly resolves https://github.com/opensearch-project/sql/issues/4757 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +Conflicts exist + + +## Review Comments + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java:12` + + +what is meaning of this error message? TODO? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java:111` + + +this is not customer facing error, right? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:None` + + +Is there any test cases show decoded pushdown script? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:214` + + +could u add a developer doc to explain the spec of pushdown script, after encoding, is not easy to read I guess. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:129` + + +By design, it is never throw exception, right? +if happened, it will customer facing exception? +if yes, Add [BUG] in error message + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:None` + + +Add UT for this method. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java:111` + + +Yeah, it will caught by `pushDownLimit` and then it will prevent the push down the limit operator then. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:None` + + +See `RelJsonSerializerTest` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java:12` + + +> Remove OpenSearchRequestBuilder in PushDownContext and make the related action lazy perform. Since we have change 3, it's less valuable to hold that object in each PushDownContext. + +I made the transformation of each push down operation lazy except `operationsForAgg`. This comment explains why we have this exception of not making it lazy. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:129` + + +Yeah, it should be a bug if this exception happens. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:None` + + +We have test `RelJsonSerializerTest` which will cover this actually. That tests verify the whole process `standardizeRexNodeExpression -> encoding -> decoding` while the `encoding -> decoding` could be ignored since we just invoked a Base64 encoder/decoder. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java:111` + + +Will add [INTERNAL] tag in the error message + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:214` + + +Added in https://github.com/opensearch-project/sql/pull/4795/commits/12780545aecec0b076e7172d49523f42c82e91c0 + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:None` + + +Could we make it a singleton instance? Maybe leverage `RexBiVisitorImpl`? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:214` + + +Thanks. It is very clear. + +Nit: Is Literals array necessary? DIGESTS and LITERALS can combined? +``` + "params": { + "utcTimestamp": 17630261838681530000, + "SOURCES": [0, 2, 2, 1], + "DIGESTS": ["age", 0, 1, "email"], + "LITERALS": [35, "u35"] + } +``` +vs +``` + "params": { + "utcTimestamp": 17630261838681530000, + "SOURCES": [0, 2, 2, 1], + "DIGESTS": ["age", 35, "u35", "email"] + } +``` + + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +why include `sources, digests, literals` as paramater in serialize() function and client create empty array? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +I see. It is required when create Script on L1504. +I also found sources, digests, literals exposed been used in multiple place without encapsulation. e.g. ScriptDataContext and standardizeRexNodeExpression. + +can we encapsulate our [script protocol](https://github.com/opensearch-project/sql/blob/12780545aecec0b076e7172d49523f42c82e91c0/docs/dev/intro-scripts.md) in a class? e.g. +``` +class ParameterBindings { + void putValue(String name, Object value) + Object getValue(String name) +} +``` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:214` + + +Agree, it could work. I split it for 2 reasons: +1. keep alignment concept, in which we view `LITERALS` as a kind of source and retrieve actual value via `DIGEST`, just like looking up `DOC_VALUE` or `SOURCE`. + +2. in case there is duplicated literal value and `LITERALS` can reduce them, e.g. +``` + "params": { + "utcTimestamp": 17630261838681530000, + "SOURCES": [0, 2, 2, 2], + "DIGESTS": ["age", 0, 0, 0], + "LITERALS": [""] + } +``` +Though in most cases, it shouldn't have much benefit. I only found 1 case in our IT hit this, like `case(age < 30, 30 else 100)` will reuse literal 30. So I don't have strongly incline on the current way. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +nit question: Is it a right import? + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +Not relevant to this PR. It would be nice to be lazy as well. Does the exception come from script generation or else? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +Agree. Our push down operations after aggregation have strongly dependance on the exception throwing mechanism, so it can prevent such push down easily if exception happens. + +We'd better extract all cases which is actually not our targets out of the transformation lambda function. But they are too many and too complex to refactor at once in this PR. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +Thanks! Will remove + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java:369` + + +One more thing that comes up in my mind, how about the sorting by ExprIpValue? It was one of blocker to prevent last change made by yuanchuan to directly convert IP to string. cc @yuancu + +Another discussion I remember is whether IP sorting is necessary. Forgot the result of discussion. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java:369` + + +Luckily seems we don't have any function/script return ExprIPValue for now, so I think at least it won't block any script push down. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:214` + + +looks good. +Can ScriptParameterHelper been used to hide protocol? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:185` + + +Can ScriptParameterHelper been used to hide protocol? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:185` + + +Do you mean put `ScriptParameterHelper` object in the script parameter directly? I remember the opensearch request will be formatted to json by using `XContentSerializer` which cannot serialize self defined class. That's why I convert `ScriptParameterHelper` into a map when constructing request. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:185` + + +By my local test, it will throw exception if putting ScriptParameterHelper in request directly. +``` +Caused by: java.lang.IllegalArgumentException: cannot write xcontent for unknown value of type class org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper +``` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:80` + + +Just curious: is there any benefit of retrieving from doc values over from source? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java:369` + + +I think this looks good for now + + +## General Comments + + +### @qianheng-aws + + +The latest change: https://github.com/opensearch-project/sql/pull/4795/commits/f74600ff6c4f002b5e018b0cece2dcc162b6056d includes: +1. an enhancement on the parameter from SORT_EXPR: https://github.com/opensearch-project/sql/pull/4750#discussion_r2532987617 +2. Apply comments: https://github.com/opensearch-project/sql/pull/4795#discussion_r2528384908 + + +--- + +# PR #4794: Support `mvindex` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4794 + +**Author:** @ahkcs + +**Created:** 2025-11-12T20:02:05Z + +**State:** MERGED + +**Merged:** 2025-11-13T19:12:48Z + +**Changes:** +434 -0 (9 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +--- + +## **Description** + +The `mvindex` function returns a subset of a multivalue array using start and optional end index values. +It supports two modes of operation: + +1. **Single element access:** + `mvindex(array, index)` — Returns the element at the specified index +2. **Range access:** + `mvindex(array, start, end)` — Returns an array slice from start to end *(inclusive)* + +--- + +### **Key Features** + +* **0-based indexing:** First element is at index `0` (PPL convention) +* **Negative indexing:** `-1` refers to the last element, `-2` to the second-to-last, etc. +* **Inclusive end index:** Range `mvindex(array, 1, 3)` returns elements at indices `1`, `2`, and `3` + +--- + +## **Usage Examples** + +### **Single Element Access** + +```ppl +source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, 1) | fields result +# Returns: b +``` + +```ppl +source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, -1) | fields result +# Returns: e +``` + +--- + +### **Range Access** + +```ppl +source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, 1, 3) | fields result +# Returns: [2, 3, 4] +``` + +```ppl +source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, -3, -1) | fields result +# Returns: [3, 4, 5] +``` + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +lgtm + + +### @ykmr1224 - APPROVED + + +LGTM + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVIndexFunctionImp.java:55` + + +Is the `args.length < 2` case worth handling? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVIndexFunctionImp.java:55` + + +The minimum number of arguments is 2, which is when we use `mvindex(,)` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4793: Fix function identify problem in converting to sql dialect + +**URL:** https://github.com/opensearch-project/sql/pull/4793 + +**Author:** @LantaoJin + +**Created:** 2025-11-12T11:00:42Z + +**State:** MERGED + +**Merged:** 2025-11-14T01:05:19Z + +**Changes:** +196 -177 (32 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Fix function identify problem in converting to sql dialect: +- change min (scalar function) to scalar_min in plan level +- change max (scalar function) to scalar_max in plan level +- remove the UDF backtick in converting to sql dialect + +### Related Issues +Resolves #4774 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +cc @dai-chen + + +--- + +# PR #4792: [Backport 2.19-dev] Translate `SAFE_CAST` to `TRY_CAST` in Spark SQL + +**URL:** https://github.com/opensearch-project/sql/pull/4792 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-12T10:23:23Z + +**State:** MERGED + +**Merged:** 2025-11-12T13:21:06Z + +**Changes:** +75 -78 (7 files) + + +## Description + +Backport b5fe1c115e32986001a9a87360986614006bdab2 from #4788. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4791: [Backport 2.19-dev] Add `regexp_replace()` function as alias of `replace()` (#4765) + +**URL:** https://github.com/opensearch-project/sql/pull/4791 + +**Author:** @LantaoJin + +**Created:** 2025-11-12T09:17:56Z + +**State:** MERGED + +**Merged:** 2025-11-12T10:23:50Z + +**Changes:** +237 -85 (22 files) + + +## Description + +(cherry picked from #4765 commit e8e9a5bad429a588746f440095f7fc311120f006) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4788: Translate `SAFE_CAST` to `TRY_CAST` in Spark SQL + +**URL:** https://github.com/opensearch-project/sql/pull/4788 + +**Author:** @LantaoJin + +**Created:** 2025-11-12T07:09:39Z + +**State:** MERGED + +**Merged:** 2025-11-12T10:23:07Z + +**Changes:** +75 -78 (7 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +- Translate `SAFE_CAST` to `TRY_CAST` in Spark SQL +- Fix the bug of translation `MAX_BY` and `MIN_BY` functions in Spark SQL + +### Related Issues +Resolves #4778 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java:30` + + +[nit] `ARGUMENT_SEPARATOR` might be a better name + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4787: [Backport 2.19-dev] Support appendpipe command in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4787 + +**Author:** @xinyual + +**Created:** 2025-11-12T06:56:33Z + +**State:** MERGED + +**Merged:** 2025-11-12T09:13:02Z + +**Changes:** +408 -1 (17 files) + + +## Description + +### Description +Backport #4602 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4784: Specify timestamp field with `timefield` in timechart command + +**URL:** https://github.com/opensearch-project/sql/pull/4784 + +**Author:** @yuancu + +**Created:** 2025-11-12T06:05:16Z + +**State:** MERGED + +**Merged:** 2025-11-26T06:44:40Z + +**Changes:** +166 -145 (13 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Allow users to specify a timestamp field instead of assuming there exists a `@timestamp` field when using `timechart` command. + +For example, the following query is made possible: +```bash +source=events | timechart timefield=start_at span=1hour by category +``` + + +### Related Issues +Resolves #4576 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:778` + + +Could you check if we need to use this specified time field in `transformPerFunction` for per functions as well? + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:778` + + +Yes, I think we need to use the specified `timefield` in per functions because otherwise they'll refer to the non-existing `@timestamp` field. I added an integration test for this case. + + +## General Comments + + +### @LantaoJin + + +non-blocking: @yuancu looks like this PR would impact the reverse enhancement https://github.com/opensearch-project/sql/pull/4775 more or less. + + +### @yuancu + + +> non-blocking: @yuancu looks like this PR would impact the reverse enhancement #4775 more or less. + +Yes it will. If this PR gets merged first, I'll help @ahkcs replace time field reference from hard-coded ones to the timefield variable. + + +--- + +# PR #4783: Fix search anoymizer only + +**URL:** https://github.com/opensearch-project/sql/pull/4783 + +**Author:** @xinyual + +**Created:** 2025-11-12T02:17:14Z + +**State:** MERGED + +**Merged:** 2025-11-21T01:42:00Z + +**Changes:** +134 -19 (15 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +The pr fix the PPLQueryDataAnonymizer's bug about search command. + +### Related Issues +Resolves #4290 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +Can you include more test cases to cover group, in, not, and or? + + +### @yuancu on `core/src/test/java/org/opensearch/sql/ast/tree/SearchTest.java:None` + + +Maybe it's better to create two constructors to avoid assigning null for use cases that do not need original `SearchExpression`: one with child and query string, another with child, query string, and original search expression + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/tree/Search.java:None` + + +I think it can be `@Nullable`, with comments like `original expression is only used for anomanyzer tests` + + +### @xinyual on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +Already add one. + + +### @xinyual on `core/src/test/java/org/opensearch/sql/ast/tree/SearchTest.java:None` + + +Already add one. + + +### @xinyual on `core/src/main/java/org/opensearch/sql/ast/tree/Search.java:None` + + +Already add. + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +It's odd that the `=` anonymized to `>=` and `<=`. It changes the semantic IMO. + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +Change to such as `time_identifier`? +For meta fields such as `_id`, `_doc` etc, how about anonymize to `meta_identifier`? + + +### @xinyual on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +Already add `time_identifier` with `meta_identifier`. Please check it. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4781: [Backport 2.19-dev] Update github workflows to move from macos-13 to 14 (#4779) + +**URL:** https://github.com/opensearch-project/sql/pull/4781 + +**Author:** @ykmr1224 + +**Created:** 2025-11-11T22:31:04Z + +**State:** MERGED + +**Merged:** 2025-11-11T23:15:22Z + +**Changes:** +4 -4 (2 files) + + +## Description + +backport https://github.com/opensearch-project/sql/commit/2057dfc24b5809e25c593710e310f9e40d7cf8ab from #4779 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4780: [Backport 2.19-dev] Fix binning udf resolution / Add type coercion support for binning UDFs + +**URL:** https://github.com/opensearch-project/sql/pull/4780 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-11T22:24:31Z + +**State:** MERGED + +**Merged:** 2025-11-11T23:56:31Z + +**Changes:** +500 -288 (22 files) + + +## Description + +Backport 20f2234002b1a3c29cd6aa7ecba30a4dc047870a from #4742. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4779: Update github workflows to move from macos-13 to 14 + +**URL:** https://github.com/opensearch-project/sql/pull/4779 + +**Author:** @ykmr1224 + +**Created:** 2025-11-11T21:59:23Z + +**State:** MERGED + +**Merged:** 2025-11-11T22:17:23Z + +**Changes:** +3 -3 (2 files) + +**Labels:** `infrastructure`, `backport 2.19-dev` + + +## Description + +### Description +- Update github workflows to move from macos-13 to 14 +- Reference: https://github.com/actions/runner-images/issues/13046 + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4777: Fix bug that `Streamstats` command incorrectly treats null as a valid group + +**URL:** https://github.com/opensearch-project/sql/pull/4777 + +**Author:** @ishaoxy + +**Created:** 2025-11-11T07:20:21Z + +**State:** MERGED + +**Merged:** 2025-11-13T02:55:09Z + +**Changes:** +310 -194 (10 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description + +1. Added a `groupNotNull` predicate + +2. Wrapped each window expression in a conditional form: + +`CASE WHEN groupNotNull THEN raw_expr ELSE CAST(NULL AS expr_type) END` + +### Related Issues +* Resolve #4751 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Hi @ishaoxy , thanks for taking this on. I just left some comments. In addition, for the testing, I saw we have covered most of the 'happy' cases, consider adding more negative cases (e.g. “What happens when `GROUP BY col1, col2` and only one column is null?”). + + +### @RyanL1997 - APPROVED + + +@ishaoxy LGTM thanks for the change. + + +## Review Comments + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1692` + + +Potential NPE: Add null check before calling `lit.getValueAs(String.class)`. The literal value could be null. + +```java + if (rc.getOperands().size() >= 2 && rc.getOperands().get(1) instanceof RexLiteral lit + && lit.getValue() != null) { + aliasName = lit.getValueAs(String.class); + } +``` + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:745` + + +nice :) + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +The same `overExpressions` calculation appears twice. Consider extracting this to a variable before the if-else block: + +```java +List overExpressions = node.getWindowFunctionList().stream() + .map(w -> rexVisitor.analyze(w, context)).toList(); +``` + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Just for my understanding: when `notNullList.isEmpty()`, returning `literal(true)` means all rows pass the null check. Is this the intended behavior when there are no grouping fields? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1692` + + +`null instanceOf Class` always return false. So no need to check again. + + +### @ishaoxy on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:745` + + +:) + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +fixed. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1648` + + +I'm a little curious why is this patch only applied to the default case. How about case 2: `node.isGlobal() && hasWindow && hasGroup`, why won't the current implementation regard two rows with `null` grouping fields as the same group? The results seem correct, but I haven't figured out the trick. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +This code now has been modified; sorry for this redundant logic. Because in this branch always hasGroup==true, notNullList will never be empty, and it will get result like `[state IS NOT NULL, country IS NOT NULL]` based on the group field. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1648` + + +Because `global` path grouping uses `return context.relBuilder.equals(rightGroup, outerGroup);` to achieve grouping, `null == null` will not return true, so `null` value will not be treated as a group. However, the `default` path uses SQL's native partition semantics. In the future, we may add a button to support both these behaviors. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:None` + + +please change the code (and all others) to +``` +... +client().performRequest(insertRequest); +try { +... +} finally { + Request deleteRequest = + new Request( + "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); + client().performRequest(deleteRequest); +} +``` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:657` + + +can you add a test for complex expression: +``` +source=%s +| eval new_state=lower(state), new_country=lower(country) +| streamstats avg(age) as avg_age by new_state, new_country +``` + + +### @ishaoxy on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:657` + + +added. + + +### @ishaoxy on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:None` + + +fixed. + + +## General Comments + + +### @RyanL1997 + + +nit: I noticed that there were multiple empty space line deletion in the diff - consider revert these changes. + + +### @ishaoxy + + +> Hi @ishaoxy , thanks for taking this on. I just left some comments. In addition, for the testing, I saw we have covered most of the 'happy' cases, consider adding more negative cases (e.g. “What happens when `GROUP BY col1, col2` and only one column is null?”). + +Thank you for the suggestion! I have added such negative test. + + +--- + +# PR #4773: [Maintenance] Enforce PR label of 'bugFix' instead of 'bug' + +**URL:** https://github.com/opensearch-project/sql/pull/4773 + +**Author:** @RyanL1997 + +**Created:** 2025-11-10T20:17:51Z + +**State:** MERGED + +**Merged:** 2025-11-10T23:04:11Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +[Maintenance] Enforce PR label of 'bugFix' instead of 'bug' + + +### Testing Screenshot + +Screenshot 2025-11-10 at 12 21 26 PM + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Could you double check if `bugfix` label can be recognized by release notes workflow? Ref: https://github.com/opensearch-project/sql/blob/main/release-notes/opensearch-sql.release-notes-3.3.0.0.md#bug-fixes. Is this label only in our plugin or used in other repo too? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4772: [Backport 2.19-dev] Adding IT suite for PPL-based dashboards in Neo for CloudWatch Lake + +**URL:** https://github.com/opensearch-project/sql/pull/4772 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-10T19:37:29Z + +**State:** MERGED + +**Merged:** 2025-11-11T01:23:59Z + +**Changes:** +4167 -0 (17 files) + + +## Description + +Backport 74d67dae0618a51de23751a314cd052fd326e20c from #4695. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4770: Add workflow for SQL CLI integration tests + +**URL:** https://github.com/opensearch-project/sql/pull/4770 + +**Author:** @Swiddis + +**Created:** 2025-11-10T19:10:32Z + +**State:** MERGED + +**Merged:** 2025-12-09T00:02:36Z + +**Changes:** +96 -0 (1 files) + +**Labels:** `infrastructure`, `stalled` + + +## Description + +### Description +Companion to https://github.com/opensearch-project/sql-cli/pull/42: Runs some tests & checks against basic compilation breakage for the SQL CLI. + +The CLI tests [aren't very robust](https://github.com/opensearch-project/sql-cli/blob/main/src/test/java/GatewayTest.java). For better testing, I understand the plan is to wait for the PPL unification that @dai-chen is working on and write a suite around it. + +At some point I'll need to make the `_shard_doc` changes configurable and start testing on a compatibility matrix, but this at least should keep the live branches of each repo in sync for now. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Could you give an example what breaking change this PR try to capture? What should we do if this failed, e.g., revert my changes or go to CLI to change code accordingly? Is this required because CLI allow user to fetch dev branch like `main`? + + +## Review Comments + + +### @ps48 on `.github/workflows/sql-cli-integration-test.yml:None` + + +Just curious why do we need these paths, why shouldn't we run this for all updates (except docs)? can path-ignore be shorter [here](https://docs.github.com/en/actions/reference/workflows-and-actions/workflow-syntax#onpushpull_requestpull_request_targetpathspaths-ignore) ? + + +### @Swiddis on `.github/workflows/sql-cli-integration-test.yml:None` + + +Just nice to have if we only match what we need, I updated these to match what we have for `sql-test-and-build-workflow` instead. + + +## General Comments + + +### @Swiddis + + +CLI tests passed + + +### @Swiddis + + +e.g. the forceCleanup method being added to RestClient's interface: https://github.com/opensearch-project/sql-cli/pull/39/files#diff-2536e76d9e0730c06e4fd3954ee536137427d864e87357d9bf722c92740306b6R324 + + + +### @LantaoJin + + +CI is broken after this PR merged @Swiddis + + +--- + +# PR #4767: [Backport 2.19-dev] Support push down sort on aggregation measure for more than one agg calls (#4759) + +**URL:** https://github.com/opensearch-project/sql/pull/4767 + +**Author:** @LantaoJin + +**Created:** 2025-11-10T06:29:16Z + +**State:** MERGED + +**Merged:** 2025-11-10T08:28:57Z + +**Changes:** +265 -148 (31 files) + + +## Description + +(cherry picked from #4759 commit 3d548b9b66652cb24bd705f5074b77ec4ecdcaa3) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4765: Add `regexp_replace()` function as alias of `replace()` + +**URL:** https://github.com/opensearch-project/sql/pull/4765 + +**Author:** @LantaoJin + +**Created:** 2025-11-08T10:49:23Z + +**State:** MERGED + +**Merged:** 2025-11-12T07:21:48Z + +**Changes:** +237 -83 (22 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `clickbench` + + +## Description + +### Description +- Add `regexp_replace()` function as alias of `replace()` +- Rename `regex_match()` to `regexp_match()`, and keep the `regex_match()` as synonyms of `regexp_match()`. +- Change the implementation of `PATTERNS` method to `replace()` from `INTERNAL_REGEXP_REPLACE_3`. + +### Related Issues +Resolves #4764 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Hi @LantaoJin , thanks for the change. It lgtm in general, and I just left some minor comments? + + +## Review Comments + + +### @RyanL1997 on `docs/user/ppl/functions/condition.rst:None` + + +I checked the rendered file and it seems like this link does not work. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java:333` + + +just for my knowledge: This functionality is being consolidated rather than removed right? - The 3-parameter `regexp` replace functionality is being handled by the existing `REPLACE` function now. + + +### @RyanL1997 on `integ-test/src/test/resources/clickbench/queries/q29.ppl:11` + + +Any reason we need to double escaping now? + + +### @LantaoJin on `docs/user/ppl/functions/condition.rst:None` + + +Okey, there is no link for `REGEX_MATCH` anymore, the `REGEX_MATCH` should be considered as a typo name. I will remove it. + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q29.ppl:11` + + +That is because the query is added to json entity of restful request. Char `\` is invalid without escaping in json body. +https://github.com/opensearch-project/sql/blob/20f2234002b1a3c29cd6aa7ecba30a4dc047870a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java#L133 + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java:333` + + +`INTERNAL_REGEXP_REPLACE_3` is not a public UDF. It is used in `patterns` and `rex` command. So I removed it from `BuiltinFunctionName.java` because `patterns` and `rex` command should be translated to `replace`, an enhanced `REGEXP_REPLACE_3` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4759: Support push down sort on aggregation measure for more than one agg call + +**URL:** https://github.com/opensearch-project/sql/pull/4759 + +**Author:** @LantaoJin + +**Created:** 2025-11-07T13:06:57Z + +**State:** MERGED + +**Merged:** 2025-11-10T05:47:04Z + +**Changes:** +254 -146 (31 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +1. Support push down sort on aggregation measure for more than one agg calls +2. Refactor `AggPushDownAction` to remove deduped codes +3. Create `OpenSearchRelBuilder` in RelRule.Config + +### Related Issues +Resolves #4737 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4758: Fix eval on grouped fields after timechart + +**URL:** https://github.com/opensearch-project/sql/pull/4758 + +**Author:** @yuancu + +**Created:** 2025-11-07T10:22:00Z + +**State:** MERGED + +**Merged:** 2025-11-14T02:30:03Z + +**Changes:** +93 -6 (2 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Reduce RelCompositeTrait of RelCollation to a single collation when creating LogicalSystemLimit + +### Related Issues +Resolves #4644 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4756: [Backport 2.19-dev] Support `chart` command in PPL (#4579) + +**URL:** https://github.com/opensearch-project/sql/pull/4756 + +**Author:** @yuancu + +**Created:** 2025-11-07T09:12:27Z + +**State:** MERGED + +**Merged:** 2025-11-10T05:25:54Z + +**Changes:** +2347 -180 (44 files) + + +## Description + +### Description + +Backport #4579 to 2.19-dev + +### Commit Message + +* WIP: Make poc implementation for chart command + + + +* Support param useother and otherstr + + + +* Support usenull and nullstr (when both row split and col split present) + + + +* Append a final aggregation to merge OTHER categories + + + +* Handle common agg functions for OTHER category for timechart + + + +* Fix timechart IT + + + +* Sort earliest results with asc order + + + +* Support non-string fields as column split + + + +* Fix min/earliest order & fix non-accumulative agg for chart + + + +* Hint non-null in aggregateWithTrimming + + + +* Add integration tests for chart command + + + +* Add unit tests + + + +* Add doc for chart command + + + +* Prompt users that multiple agg is not supported + + + +* Add explain ITs + + + +* Remove unimplemented support for multiple aggregations in chart command + + + +* Add unit tests for chart command + + + +* Remove irrelevant yaml test + + + +* Tweak chart.rst + + + +* Swap the order of chart output to ensure metrics come last + + + +* Filter rows without col split when calculate grand total + + + +* Chores: tweak code order + + + +* Add anonymize test to chart command + + + +* Change grammart from limit=top 10 to limit=top10 + + + +* Update chart doc + + + +* Rename __row_number__ for chart to _row_number_chart_ + + + +* Sort by row and col splits on top of chart results + + + +* Ignore rows without a row split in chart command + + + +* Keep categories with max summed values when top k is set + + + +* Simplify toAddHintsOnAggregate condition + + + +* Chores: eliminate unnecessary variables + + + +* Apply a non-null filter on fields referred by aggregations + + + +* Fix chart plans + + + +* Get rid of record class + + + +* Move ranking by column split to a helper function + + + +--------- + + +(cherry picked from commit 55239323cac124df414ad1cee319f0b0f33a4513) + + +### Related Issues +Resolves #399 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4755: Merge the implementation of `timechart` and `chart` + +**URL:** https://github.com/opensearch-project/sql/pull/4755 + +**Author:** @yuancu + +**Created:** 2025-11-07T08:58:44Z + +**State:** MERGED + +**Merged:** 2025-11-12T10:42:22Z + +**Changes:** +1096 -1422 (31 files) + +**Labels:** `bug`, `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +`timechart` is semantically a subset of `chart` , where the row-split is always fixed to `@timestamp`. This PR merges their implementation for easier maintenance and resolves a few existing bugs of `timechart`. + +### Related Issues +Resolves #4581, resolves #4582, resolves #4632 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `docs/user/ppl/cmd/timechart.rst:175` + + +`cpu_usage` does not exist in the given index, therefore replaced + + +### @yuancu on `docs/user/ppl/cmd/timechart.rst:213` + + +`region` does not exist in the given index, therefore replaced + + +### @LantaoJin on `docs/user/ppl/cmd/timechart.rst:163` + + +Why the results before and after for the same ppl query are different? please confirm the current result is correct comparing to SPL. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java:311` + + +why the location of option `nullstr` changed? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java:308` + + +ditto: cannot `timechart span=1d limit=2 count() by host` work now? can it work in spl? + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java:311` + + +@penghuo commented that it would be better if the argument positions are more flexible in https://github.com/opensearch-project/sql/pull/4579#discussion_r2496294627 + +Here, I altered the location of argument `nullstr` to exemplify this point + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java:308` + + +It works. The locations of these arguments are scattered in different locations across the ITs. + + +### @yuancu on `docs/user/ppl/cmd/timechart.rst:163` + + +The difference lies in whether we fill `0` for non-existing groups. + +In SPL, `timechart` and `chart` will pivot the result table, explicitly filling 0 for `count` aggregation, leaving it empty for the rest aggregation functions. E.g. the result in SPL will be like: + + | `@timestamp` | `server1` | `server2` | + |---------------------|---------|---------| + | 2023-01-01 10:00:00 | 1 | 0 | + | 2023-01-01 10:05:00 | 0 | 1 | + | 2023-01-01 10:10:00 | 1 | 0 | + | 2023-01-01 10:15:00 | 0 | 1 | + | 2023-01-01 10:20:00 | 1 | 0 | + | 2023-01-01 10:25:00 | 0 | 1 | + | 2023-01-01 10:30:00 | 1 | 0 | + | 2023-01-01 10:35:00 | 0 | 1 | + + +In [this thread](https://github.com/opensearch-project/sql/pull/4579#discussion_r2478923136) and offline discussions, we decided to not fill `0` for non-existing groups to simplify implementations and keep consistence with docs (see #4632), leaving the pivoting and zero-filling to frontend if necessary. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4754: Feature addtotals and addcoltotals + +**URL:** https://github.com/opensearch-project/sql/pull/4754 + +**Author:** @asifabashar + +**Created:** 2025-11-07T07:59:58Z + +**State:** MERGED + +**Merged:** 2025-12-15T18:27:53Z + +**Changes:** +2301 -1 (25 files) + +**Labels:** `PPL`, `feature`, `backport 2.19-dev` + + +## Description + +### Description +Is your feature request related to a problem? +addtotals command to show total of all columns of each row as a new column , and also have option to show total of all rows of each column values to show at the end of rows. +Fixes issue #4607 +From roadmap https://github.com/opensearch-project/sql/issues/4287 + +addcoltotals command to show total of each column's all rows values to show at the end of rows. +From roadmap https://github.com/opensearch-project/sql/issues/4287 + +What solution would you like? +command: addtotals ,addcoltotals +addtotals: Add totals across rows by default and also calculate total across columns when col=true +The addtotals command adds together the numeric fields in each search result. + +You may specify which fields to include rather than summing all numeric fields. +The final total is stored in a new field. + +The addtotals command's behavior is as follows: + +When col=true, it computes the sum for every column and adds a summary row at the end containing those totals. + +To label this final summary row, specify a labelfield and assign it a value using the label option. + + +Alternatively, instead of using the addtotals col=true command, you can use the addcoltotals command to calculate a summary event. + +labelfield, if specified, is a field that will be added at the last row of the column specified by labalfield with the value set by the 'label' option. + +Command Syntax: +`addtotals [row=] [col=] [labelfield=] [label=] [fieldname=] []` +arguments description: +**row**: Syntax: `row=` . Indicates whether to compute the sum of the for each event. This works like generating a total for each row in a table. The result is stored in a new field, which is named Total by default. To use a different field name, provide the fieldname argument. Default value is `true`. + + +**col** : Syntax: `col=` . Indicates whether to append a new event—called a summary event—to the end of the event list. This summary event shows the total for each field across all events, similar to calculating column totals in a table. Default is false. + + +**fieldname** : Syntax: `fieldname=` . Specifies the name of the field that stores the calculated sum of the field-list for each event. This argument is only applicable when row=true. Default is `Total` + +**field-list** : `Syntax: ...` . One or more numeric fields separated by spaces. Only the fields listed in the are included in the sum. If no is provided, all numeric fields are summed by default. + +**labelfield** : Syntax: `labelfield=` . Specifies a field to use as the label for the summary event. This argument is only applicable when col=true." + +To use an existing field from your result set, provide its name as the value for the labelfield argument. For example, if the field is named salary, specify labelfield=salary. If no existing field matches the labelfield value, a new field is created using that value. + +**label**: Syntax: `label=`. Specifies a row label for the summary event. + +If the labelfield argument refers to an existing field in the result set, the label value appears in that field for the summary row. + +If the labelfield argument creates a new field, the label is placed in that new field in the summary event row. Default label is `Total`. + + +**command addcoltotals:** Add totals across columns of each row to show total in a new field. + +addcoltotals: options +Optional Arguments +**``** +Syntax: ` ...` . A space-delimited list of valid field names. addcoltotals calculates sums only for the fields you include in this list. By default, the command calculates the sum for all fields. + +**labelfield**: Syntax: `labelfield=`. Field name to add to the result set. + +**label** : Syntax: `label=` . Used together with the labelfield argument to add a label to the summary event. If labelfield is not specified, the label argument has no effect. Default label is `Total`. + +### Related Issues +Resolves #4607 [#4607 ] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +Following required items for new command is missing: +- Explain tests: + - Add tests to ExplainIT or CalciteExplainIT +- Unsupported in v2 test: + - Add a test in NewAddedCommandsIT +- Cross-cluster Tests (optional, nice to have): + - Add a test in CrossClusterSearchIT + +[ref](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) + + +## Review Comments + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +in SPL addtotal comamnd fieldname parameter is used, some existing unit tests are failing after adding fieldname as antlr reserved keyword , changed those existing tests here. + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +in SPL addtotal comamnd fieldname parameter is used, some existing unit tests are failing after adding fieldname as antlr reserved keyword , changed those existing tests here. + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:None` + + +in SPL addtotal comamnd fieldname parameter is used, some existing unit tests are failing after adding fieldname as antlr reserved keyword , changed those existing tests here. + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:588` + + +Seems the `addtotalsOption` only can appear after `fieldList`. +Why not +``` +ADDTOTALS addtotalsOption* (fieldList)? +``` + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:588` + + +please update `keywordsCanBeId`, or else the query `source = test | fields row` will failed if a field name is `col` in index test. + + +### @asifabashar on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:2005` + + +added closing brace + + +### @asifabashar on `ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java:801` + + +Column Level should not need anonymization as its not actual data + + +### @asifabashar on `integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java:203` + + +other all existing tests are following same pattern + + +## General Comments + + +### @asifabashar + + +@penghuo Please review + + +### @asifabashar + + +@penghuo please review + + +### @asifabashar + + +@LantaoJin Please review + + +### @asifabashar + + +HI maintainers, please review. + + +### @asifabashar + + +> Following required items for new command is missing: +> +> * Explain tests: +> +> * Add tests to ExplainIT or CalciteExplainIT +> * Unsupported in v2 test: +> +> * Add a test in NewAddedCommandsIT +> * Cross-cluster Tests (optional, nice to have): +> +> * Add a test in CrossClusterSearchIT +> +> [ref](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) + +HI @LantaoJin . Thanks for your review. I have added all missing items. +Please Review. + + +### @LantaoJin + + +cc @anasalkouz + + +### @asifabashar + + +@LantaoJin after resolving conflict, the review mark has been reset + + +### @asifabashar + + +@anasalkouz Please help review as 2 reviewers are needed. + + + + +### @asifabashar + + +@kylehounslow please review. Currect CI faliures are unrelated. + + + +### @asifabashar + + +@dai-chen I have applied the recommended changes, Please review. Please check the use case https://github.com/opensearch-project/sql/pull/4754#discussion_r2612481532 + + +--- + +# PR #4753: Coerce any to specific types for dynamic fields + +**URL:** https://github.com/opensearch-project/sql/pull/4753 + +**Author:** @ykmr1224 + +**Created:** 2025-11-06T21:32:13Z + +**State:** MERGED + +**Merged:** 2025-11-06T23:30:01Z + +**Changes:** +111 -55 (4 files) + +**Labels:** `enhancement`, `PPL`, `calcite` + +**Assignees:** @ykmr1224 + + +## Description + +### Description +- Coerce any to specific types +- This is for dynamic fields to work without explicit cast. + +### Related Issues +Permissive mode RFC: https://github.com/opensearch-project/sql/issues/4349 +Dynamic fields RFC: https://github.com/opensearch-project/sql/issues/4433 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4752: [Backport 2.19-dev] Support script pushdown in sort-on-measure pushdown rewriting (#4749) + +**URL:** https://github.com/opensearch-project/sql/pull/4752 + +**Author:** @LantaoJin + +**Created:** 2025-11-06T08:38:54Z + +**State:** MERGED + +**Merged:** 2025-11-06T10:05:07Z + +**Changes:** +452 -50 (27 files) + + +## Description + +(cherry picked from #4749 commit f4832470cbf8bd10f6c7eb8a2adbf0c60b15b603) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4750: Pushdown sort by complex expressions to scan + +**URL:** https://github.com/opensearch-project/sql/pull/4750 + +**Author:** @songkant-aws + +**Created:** 2025-11-06T06:26:14Z + +**State:** MERGED + +**Merged:** 2025-11-18T06:04:34Z + +**Changes:** +1820 -41 (39 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Add sort performance enhancement of pushing down sort by complex expressions to scan. + +### Related Issues +Resolves #3912 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +non-blocking for this PR: +can you test the sort pushdown on join condition such `on a.id + 1 = b.id +2`? + + +## Review Comments + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java:None` + + +Need to add sort order and null direction here. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java:None` + + +Need to add sort order here and check if script sort supports null direction + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:545` + + +javadoc + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:874` + + +Add more ITs: +``` +eval age2 = age + balance | fields age, age2 | sort age2 +eval age2 = age + balance, age3 = age2 - age, | sort age3 +``` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java:None` + + +why not add all ITs in SortCommandIT.java + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java:None` + + +no necessary + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java:None` + + +can you add some IT and ExplainIT for this case? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:None` + + +javadoc: what's the difference with `CalciteLogicalIndexScan.pushDownLimit`? It confused we have both methods here. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExpressionInfo.java:None` + + +can you rename this class to `SortExprDigest` and move it to package `org.opensearch.sql.opensearch.storage.scan.context` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:324` + + +can you add unit tests in `OpenSearchRelOptUtilTest`? + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java:None` + + +All of sort expression pushdown ITs are for this case. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:None` + + +It's redundant. I will remove it. + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java:None` + + +Sometimes I want the assertion on pushed down context as well, which only Calcite supports it. I'm not strong opinion of adding pushdown context on IT. But feel it's probably safer to add it in case some commit may break the pushdown optimization. + + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java:None` + + +nit: which one for example? +and can this predicate move to definition of ExpandCollationOnProjectExprRule.Config (can reduce node copy) + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java:None` + + +Seems duplicated with the below predication of `noAggregatePushed`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:332` + + +Is it possible that the pushed SORT(i.e. SORT_EXPR with all digests are simple expression) can satisfy sort collation here? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java:97` + + +We still push down sort although `scanProvidesRequiredCollation` is true? I think we could just remove the sort directly. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +Better use LinkedHashMap here or the final plan may be unstable. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:None` + + +[question] Will this method throw exception by any chance? We plan to make the request builder transformation process lazy. We should put any logic which may throw exception outside of the lambda function, so it can avoid failure on the final plan. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java:None` + + +What if script is null? Seems nothing happens. We better add assertion or throw exception if that's never possible. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +I think we should filter the simple expr. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:332` + + +It's possible. The cost computation has coarse results by multiplying field count with 1.1. So VolcanoPlanner will prefer regular field sort pushdown. + +Another option is to calculate the accurate complex expression count and multiply this count with 1.1. Put a check in the rule to not pushdown if all of expressions are simple expressions + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java:97` + + +I was thinking if first sort by [a, b] expressions is already in scan. The second sort by [a] expression could narrow down the sort effort. We could still allow the pushdown. + +For topK pushdown, it could be also applied. If sort by [a, b], limit 10 is pushed, say the second sort by [a], limit 2 comes in, we should allow it pushdown as well. This may need replace existing SORT_EXPR and LIMIT pushdown context. + +But right now, it has the problem that multiple different sort complex expressions doesn't work well yet. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java:97` + + +> The second sort by [a] expression could narrow down the sort effort. We could still allow the pushdown. + +Shall we do the same thing for the above branch then? It only push down limit when scanProvidesRequiredCollation is true. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:332` + + +I mean, in this query `... | sort a | eval b = a + 1 | sort b`, when pushing the `sort b`, this method will always return false. But I expect it return true. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java:None` + + +Use `noAggregatePushed` now + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:332` + + +Yes, it returns false in this case but doesn't matter. Now, if all of expressions are simple, it will go through regular pushDownSort. Field sort exists in regular traitset and is propagated well. It will be handled by `handleSimpleExpressionFieldSorts` method in ExpandCollationOnProjectExprRule. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java:97` + + +Revisit the logic, for already pushed down topK and if the scan collation satisfies the output collation, we can still leave the old sort in the scan(remove the new sort in the meantime). It's kind of keeping semantics. For regular sort pushdown, we can override by the new sort. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +Use LinkedHashMap now + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:None` + + +As discussed offline, move most of logic that could throw exception outside. And use a lazy pushdownSortSuppliers method instead. + + +## General Comments + + +### @songkant-aws + + +@LantaoJin It doesn't support sort expr pushdown for SortMergeJoin yet. + +SortMergeJoin is optimized by EnumerableMergeJoinRule, which is a physical plan level rule. It will inserts collation to left and right children of join and then sort is transformed to EnumerableSort. + +Our rules mostly optimize logical plans. Considering it's a valid case, I think one more enhancement is to add physical plan optimization support(Enumerable convention optimization) as well. I will create an issue to track this enhancement. + + + + +--- + +# PR #4749: Support script pushdown in sort-on-measure pushdown rewriting + +**URL:** https://github.com/opensearch-project/sql/pull/4749 + +**Author:** @LantaoJin + +**Created:** 2025-11-06T03:32:09Z + +**State:** MERGED + +**Merged:** 2025-11-06T08:18:35Z + +**Changes:** +452 -50 (27 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix the bugs: +1. Sort-on-measure fails to be pushed down if there is script +2. Add more ITs for sort-on-measure +3. Sort-on-measure for multi-terms misses bucketOrder rewriting. + +### Related Issues +Resolves #4738 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4748: Fix test failures due to version in mapping + +**URL:** https://github.com/opensearch-project/sql/pull/4748 + +**Author:** @ykmr1224 + +**Created:** 2025-11-05T23:55:31Z + +**State:** MERGED + +**Merged:** 2025-11-06T03:30:01Z + +**Changes:** +11 -11 (10 files) + +**Labels:** `maintenance` + +**Assignees:** @ykmr1224 + + +## Description + +### Description +- Fix test failures in main due to version in mapping +- Seems the check added in https://github.com/opensearch-project/OpenSearch/pull/19793 is causing error. +- Version number needs to have opensearch flag (version & 0x08000000) to be 1 (ref: https://github.com/opensearch-project/OpenSearch/blob/b1d1e33a3d251ef309800cb8ca03411599eccc81/libs/core/src/main/java/org/opensearch/Version.java#L209) +- `137217827` = 0x08000000 | 3000099 (means OpenSearch 3.0.0) + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4747: [Backport 2.19-dev] PPL tostring() implementation issue #4492 + +**URL:** https://github.com/opensearch-project/sql/pull/4747 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-05T19:02:53Z + +**State:** MERGED + +**Merged:** 2025-11-05T22:03:07Z + +**Changes:** +592 -0 (9 files) + + +## Description + +Backport 6783c897ca308051a686c52f321ab11a730a42fd from #4497. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4745: [Backport 2.19-dev] Fix filter push down producing redundant filter queries + +**URL:** https://github.com/opensearch-project/sql/pull/4745 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-05T10:17:55Z + +**State:** MERGED + +**Merged:** 2025-11-06T00:51:10Z + +**Changes:** +54 -12 (9 files) + + +## Description + +Backport da1f6c096dc6a4e46f1a7025d52f0e983f4af57b from #4744. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4744: Fix filter push down producing redundant filter queries + +**URL:** https://github.com/opensearch-project/sql/pull/4744 + +**Author:** @qianheng-aws + +**Created:** 2025-11-05T07:26:12Z + +**State:** MERGED + +**Merged:** 2025-11-05T10:17:39Z + +**Changes:** +54 -12 (9 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Fix filter push down producing redundant filter queries. + +It's caused by that when pushing down another filter, OpenSearchRequestBuilder will update the current query if it it exists and is a bool query. However, for Calcite, it's not suitable since it will hold many equivalent candidate plans in its planner process and will do lots of copy from the old plans. Updating the query builder will cause the change of request builder in the old plans. Many redundant filter queries will be added if this operation is triggered multiple times in the planning process. + +This PR fixes this issue by separating a `pushDownFilterForCalcite` individually which won't update the current query but creating a new one. + +### Related Issues +Resolves #4729 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +Non-blocking: seems we can add a thread-local to indicate what engine is used for current query, to retire all `forCalcite` API. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4743: Support aggregation/window commands with dynamic fields + +**URL:** https://github.com/opensearch-project/sql/pull/4743 + +**Author:** @ykmr1224 + +**Created:** 2025-11-05T00:11:25Z + +**State:** MERGED + +**Merged:** 2025-11-19T19:37:03Z + +**Changes:** +1484 -103 (16 files) + +**Labels:** `enhancement`, `PPL`, `calcite` + +**Assignees:** @ykmr1224 + + +## Description + +This PR is for feature branch `feature/permissive` + +### Description +- Support aggregation/window commands with dynamic fields + - stats, eventstats, timechart, trendline +- DebugUtils/JsonUtils are just utility class mainly for tests and debugging. + +### Related Issues +Permissive mode RFC: https://github.com/opensearch-project/sql/issues/4349 +Dynamic fields RFC: https://github.com/opensearch-project/sql/issues/4433 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:51` + + +no pushdown? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:None` + + +nit, remove " + " + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:None` + + +what is type of salary? Type coercion does not works for avg? + + +### @penghuo on `common/src/main/java/org/opensearch/sql/common/utils/DebugUtils.java:None` + + +remove it? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsTimechartIT.java:None` + + +I am afraid this error message does not help user too much. +type coercion does not works for sma? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +why releated to dynamicFieldAccess? it just alias, right? + + +### @ykmr1224 on `common/src/main/java/org/opensearch/sql/common/utils/DebugUtils.java:None` + + +I will fix the comment and leave this class. It is not used in the code, but useful while testing and debugging. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +You are right. I will rename it. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:51` + + +Currently testing without pushdown. Will test pushdown separately. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:None` + + +As the salary is dynamic field, it is typed as ANY. I think current type coercion doesn't work for ANY. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsTimechartIT.java:None` + + +I am thinking to work on the automatic type coercion from ANY to specific types separately. This is a workaround until then. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:None` + + +What if we use STRING? The current type coercion rules already support STRING → NUMERIC. +I believe dynamic field support will depend on type coercion regardless. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:None` + + +Do you mean typing dynamic fields as STRING? (Convert everything into String?) +That should work, but we need to convert back to STRING every time storing to dynamic fields. (since Map can take single value type) + + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsAggregationIT.java:None` + + +PR for type coercion: https://github.com/opensearch-project/sql/pull/4753 + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Can we cast to string for groupBy field? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +I realized timechart requires bigger change due to type assigned to `span` function, which prevents automatic type coercion work properly. +Let me address this in a separate PR. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Found simpler way to solve the problem, and included the change in this PR. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java:None` + + +Could u explain it more on this, "// if first argument is string, consider it as timestamp"? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2054` + + +is it required for all visitor? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2054` + + +could u add a test in CalciteDynamicFieldsTimechartIT to help understand what is correspond logical plan / sql + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java:None` + + +Updated. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2054` + + +Added CalcitePPLDynamicFieldsTest.java‎ for spark SQL. Added explains in IT. + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDynamicFieldsTest.java:118` + + +The output always include _MAP columns? +@dai-chen does it works with unified ppl in spark? + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDynamicFieldsTest.java:118` + + +Sure, let me check. + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDynamicFieldsTest.java:118` + + +It contains `_MAP` when the query does not explicitly select fields, since it should output all the dynamic fields along with static fields. (You can refer test case: `testProjectStaticFields`) + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsTimechartIT.java:38` + + +Can we only assert the part we're interested in? + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDynamicFieldsTest.java:118` + + +As I understand if we submit such SQL query on S3 table to Spark directly, the changes include at least: + +1. Add `_MAP` to Spark table schema +2. Add result expanding logic similarly as `DynamicFieldsResultProcessor.expandDynamicFields()` + +Do you have example for writing `_MAP`? I want to check if more changes required. + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDynamicFieldsTest.java:118` + + +@dai-chen +`_MAP` should be automatically added to the table schema when permissive mode is enabled, or a command generate dynamic fields (like `spath` command without `output` param) + +`_MAP` is collected [here](https://github.com/opensearch-project/sql/blob/feature/permissive/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java#L120) +Refer [this PR](https://github.com/opensearch-project/sql/pull/4611) for further context. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsTimechartIT.java:38` + + +I've added this per request from @penghuo to add explain verification, and I think it is better keeping whole part to detect when plan is changed. +I would migrate it to separate file once I merge the change and enabled permissive mode in `main` branch. (it is currently enabled only in integration test and cannot use same test base class) + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDynamicFieldsTest.java:118` + + +@ykmr1224 I just want to make sure I’m understanding this correctly. + +- **Case 1:** For `_MAP` generated from a table, do we need to update the Spark catalog to add it when permissive mode is enabled? When you say "automatically added to the table", it means current OpenSearch schema right? +- **Case 2:** For `_MAP` generated dynamically by a command like `spath`, could you share a concrete example, including: + - the PPL query, and + - the Spark SQL query generated? + +Since our approach is to transpile PPL into Spark SQL, I’d like to ensure that all required semantics are encoded in the SQL we generate. Otherwise, we’ll need to estimate the effort for any changes required in the Spark SQL engine. + + +## General Comments + + +### @ykmr1224 + + +Updated to utilize type coercion. + + +--- + +# PR #4742: Fix binning udf resolution / Add type coercion support for binning UDFs + +**URL:** https://github.com/opensearch-project/sql/pull/4742 + +**Author:** @ahkcs + +**Created:** 2025-11-04T23:50:50Z + +**State:** MERGED + +**Merged:** 2025-11-11T22:24:18Z + +**Changes:** +500 -288 (22 files) + +**Labels:** `bug`, `PPL`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description + Fixes #4740 - Binning UDFs are now resolved through `PPLFuncImpTable.resolve()` instead of being instantiated directly via `makeCall()`. + +Implemented automatic type coercion to allow string fields with numeric values to work seamlessly with all binning operations (bins, span, minspan, range). + + + +### Related Issues +* Resolves #4740 + +Type coercion support for binning UDFs #4356 + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the fix! + + +## Review Comments + + +### @dai-chen on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml:39` + + +is this required for each test? + + +### @ahkcs on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml:39` + + +I think `yamlRestTest` is used to verify issue fixed + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java:138` + + +np: in future we may need more readable function signature/checker spec or docs for internal function usage too. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java:138` + + +Added comments + + +### @penghuo on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml:None` + + +verify actually results. + + +### @ahkcs on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml:None` + + +Updated expected results + + +### @penghuo on `docs/user/ppl/cmd/bin.rst:None` + + +ditto + + +### @penghuo on `docs/user/ppl/cmd/bin.rst:None` + + +I suggest remove this section. + +Type conversion is a generic feature applied across all commands and functions, so it does not need to be explicitly mentioned in each one. + +Additionally, enabling binning operations should target string-type fields, not keyword or text fields specifically. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java:None` + + +It should return a null value. For example, if only one document contains an invalid field, the entire query should not fail. + + +### @ahkcs on `docs/user/ppl/cmd/bin.rst:None` + + +Updated to remove comments + + +### @ahkcs on `docs/user/ppl/cmd/bin.rst:None` + + +Updated to remove this section + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java:None` + + +I think it makes sense, I have updated to remove the validation logic. I kept the validation logic in `BinnableField` to only verify if it's supported type(Reject truly unsupported types (e.g., BOOLEAN, ARRAY, MAP)) + + +## General Comments + + +### @dai-chen + + +Adding `bug` label since enforce-label workflow doesn't recognize `bugFix` label. + + +### @ahkcs + + +Unit tests in CI seems flaky + + +### @dai-chen + + +@ahkcs CI is failing. I retried but still failed. Could you double check? + + +### @ahkcs + + +> @ahkcs CI is failing. I retried but still failed. Could you double check? + +Unit tests passed after rebase + + +--- + +# PR #4741: [Backport 2.19-dev] Support wildcard for replace command (#4698) + +**URL:** https://github.com/opensearch-project/sql/pull/4741 + +**Author:** @ahkcs + +**Created:** 2025-11-04T23:23:29Z + +**State:** MERGED + +**Merged:** 2025-11-05T02:29:41Z + +**Changes:** +733 -25 (9 files) + + +## Description + +(cherry picked from commit f97adb00227d0b209becc4cea72501af221533e6) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4736: [Backport 2.19-dev] Add `allowed_warnings` in yaml restful tests + +**URL:** https://github.com/opensearch-project/sql/pull/4736 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-04T09:37:31Z + +**State:** MERGED + +**Merged:** 2025-11-05T02:32:36Z + +**Changes:** +125 -1 (25 files) + + +## Description + +Backport a24e794e2196f680f67fee6201650ce443a25a65 from #4731. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4735: [Backport 2.19-dev] Update clickbench queries with parameter bucket_nullable=false + +**URL:** https://github.com/opensearch-project/sql/pull/4735 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-04T09:36:47Z + +**State:** MERGED + +**Merged:** 2025-11-05T02:34:46Z + +**Changes:** +1983 -56 (116 files) + + +## Description + +Backport 7420c6eeda6099fd81eef93df3cd6fdf6bcf37c7 from #4732. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4732: Update clickbench queries with parameter bucket_nullable=false + +**URL:** https://github.com/opensearch-project/sql/pull/4732 + +**Author:** @qianheng-aws + +**Created:** 2025-11-04T08:00:41Z + +**State:** MERGED + +**Merged:** 2025-11-04T09:36:31Z + +**Changes:** +1983 -56 (116 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Update clickbench queries with parameter bucket_nullable=false, also add checking on plan for v3. + +q40 and q43 don't add the parameter as blocked by #4729 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4731: Add `allowed_warnings` in yaml restful tests + +**URL:** https://github.com/opensearch-project/sql/pull/4731 + +**Author:** @LantaoJin + +**Created:** 2025-11-04T07:57:13Z + +**State:** MERGED + +**Merged:** 2025-11-04T09:37:16Z + +**Changes:** +125 -1 (25 files) + +**Labels:** `testing`, `backport 2.19-dev` + + +## Description + +### Description +Even we've merged #4569, we'd better still to add `allowed_warnings` in all yaml restful tests due to the PRs are always needed to backport to 2.19-dev branch which is no `_shard_doc` can be used. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4730: [Backport 2.19-dev] Support Streamstats command with calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4730 + +**Author:** @ishaoxy + +**Created:** 2025-11-04T06:45:05Z + +**State:** MERGED + +**Merged:** 2025-11-04T15:15:53Z + +**Changes:** +2518 -23 (34 files) + + +## Description + +### Description +backport #4297 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4728: [Backport 2.19-dev] Enhance tests and doc for eval isnull/isnotnull functions + +**URL:** https://github.com/opensearch-project/sql/pull/4728 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-04T05:16:47Z + +**State:** MERGED + +**Merged:** 2025-11-05T02:34:34Z + +**Changes:** +131 -30 (2 files) + + +## Description + +Backport c9a8f47ea993c95bb6b13023de68ab54ddedba00 from #4724. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4727: [Backport 2.19-dev] Support access to nested field of struct after fields command + +**URL:** https://github.com/opensearch-project/sql/pull/4727 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-11-04T02:58:53Z + +**State:** MERGED + +**Merged:** 2025-11-04T09:09:27Z + +**Changes:** +61 -3 (2 files) + + +## Description + +Backport e06ebc5d04f0c55d9b4026da3accd9cc266f190c from #4719. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +cc @qianheng-aws + + +--- + +# PR #4725: [Backport 2.19-dev] Support serializing external OpenSearch UDFs at pushdown time (#4618) + +**URL:** https://github.com/opensearch-project/sql/pull/4725 + +**Author:** @yuancu + +**Created:** 2025-11-04T02:12:13Z + +**State:** MERGED + +**Merged:** 2025-11-04T05:45:08Z + +**Changes:** +143 -18 (9 files) + + +## Description + +### Description + +Backport #4618 to 2.19-dev + +### Commit Message + +* Supports serilizing external OpenSearch UDFs + + + +* Correct subfield access logical when calling ITEM + + + +* Resolve types of generated structs based on their values because their types are UNDEFINED + + + +* Add explain and integration tests for geoip + + + +--------- + + +(cherry picked from commit a003e8c7399b62ebc3a57c07bb98f11d843ceb82) + +### Related Issues +Resolves #4478 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4724: Enhance tests and doc for eval isnull/isnotnull functions + +**URL:** https://github.com/opensearch-project/sql/pull/4724 + +**Author:** @ahkcs + +**Created:** 2025-11-03T23:14:26Z + +**State:** MERGED + +**Merged:** 2025-11-04T05:16:26Z + +**Changes:** +131 -30 (2 files) + +**Labels:** `documentation`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Enhance tests and doc for eval isnull/isnotnull functions + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4723: [Backport 2.19-dev] Publish internal modules separately for downstream reuse + +**URL:** https://github.com/opensearch-project/sql/pull/4723 + +**Author:** @dai-chen + +**Created:** 2025-11-03T21:17:14Z + +**State:** MERGED + +**Merged:** 2025-11-04T19:51:02Z + +**Changes:** +703 -0 (8 files) + +**Labels:** `enhancement` + +**Assignees:** @dai-chen + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4484 commit 05d6594c5163378cae45d37d03984439d5edd98e and add `2.19-dev` to publish workflow) + +**Note**: This PR enables publishing the internal modules from this `2.19-dev` branch as version 2.19.0.0 to support the PPL unification in Spark development (Spark 3.5 required JDK 11 / 17). The `2.19` branch currently has no publish task for these modules, so this publish is won’t conflict with or overwrite any existing artifacts. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4721: [Backport 2.19-dev] Fix sub-fields accessing of generated structs (#4683) + +**URL:** https://github.com/opensearch-project/sql/pull/4721 + +**Author:** @yuancu + +**Created:** 2025-11-03T10:27:37Z + +**State:** MERGED + +**Merged:** 2025-11-04T08:00:59Z + +**Changes:** +49 -2 (4 files) + + +## Description + +* Correct subfield access logical when calling ITEM + + + +* Add explain and integration tests + + + +--------- + + +(cherry picked from commit 3a42c511cd08b2639f3c95441a9b7cac2f8ddb2c) + +### Description +Backport #4683 to 2.19-dev + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4720: [Backport 2.19-dev] Bump Calcite to 1.41.0 (#4714) + +**URL:** https://github.com/opensearch-project/sql/pull/4720 + +**Author:** @LantaoJin + +**Created:** 2025-11-03T10:07:31Z + +**State:** MERGED + +**Merged:** 2025-11-03T12:58:31Z + +**Changes:** +231 -281 (51 files) + + +## Description + +(cherry picked from #4714 commit 23dc63814ca903ad1f26f26af3cd0804ecb56078) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4719: Support access to nested field of struct after fields command + +**URL:** https://github.com/opensearch-project/sql/pull/4719 + +**Author:** @qianheng-aws + +**Created:** 2025-11-03T10:02:30Z + +**State:** MERGED + +**Merged:** 2025-11-04T02:58:40Z + +**Changes:** +61 -3 (2 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Support access to nested field of struct after fields command + +### Related Issues +Resolves #3459 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:181` + + +Non-blocking: Do you have any idea how to specify precise type here? Calcite 1.41.0 has more strict type checking in RexToLixTranslator. If the Map value is another Array type, it cannot cast a ANY to Array. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:181` + + +Maybe we can do type-conversion recursively for the nested fields and store them in a customized type? It will has benefit to resolve proper functions for these fields. + +What's more I'm thinking we may be able to deprecate the flatten fields if these nested fields' type are precise enough. It's not worthy to do that now since we have flatten fields and it only takes effect after `FIELD` command. It should be done as another issue for enhancement. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:181` + + +In the beginning, I'm thinking using `RelRecordType ` instead of `MapSqlType` to address this issue, which will get better performance and naturally support variant types. + +But looks like using map to support dynamic fields and type tolerance will be our trend. https://github.com/opensearch-project/sql/issues/4349 and https://github.com/opensearch-project/sql/issues/4433 + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4718: [Backport 2.19-dev] Fix disk no space issue in bwc-test (#4716) + +**URL:** https://github.com/opensearch-project/sql/pull/4718 + +**Author:** @LantaoJin + +**Created:** 2025-11-03T08:40:55Z + +**State:** MERGED + +**Merged:** 2025-11-03T09:16:24Z + +**Changes:** +216 -108 (4 files) + + +## Description + +(cherry picked from #4716 commit a9e5d391a41cf12ed7d1c17d2a29d37f49df7a68) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4717: Support using decimal as span literals + +**URL:** https://github.com/opensearch-project/sql/pull/4717 + +**Author:** @yuancu + +**Created:** 2025-11-03T07:01:18Z + +**State:** MERGED + +**Merged:** 2025-11-13T06:55:44Z + +**Changes:** +103 -26 (8 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description + +This PR restores support for decimal in span literals for bin commad. + +E.g. It makes the following queries possible +- span used in timechart, bin, etc. E.g. `source=events_null | bin cpu_usage span=7.5 | stats count() by cpu_usage` +- decimal for span used in stats. E.g. `| stats span(value, 9.5)` + +Please note that the following items are not in scope as they were not supported: +- decimal + time unit. E.g. `| timechart span=0.95d` + +### Related Issues +Resolves #4631 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java:1073` + + +is `span(@timestamp, 0.5d)` supported with this PR? please add an IT for that + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java:1073` + + +>Please note that the following items are not in scope as they were not supported: +> +> - decimal + time unit. E.g. | timechart span=0.95d +> - decimal for span used in stats. E.g. | stats span(value, 9.5) + +What exception will be thrown in above PPLs, please add IT or UT for them. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java:1073` + + +- Added a unit test for the error in the first use case + +- Restore support to the second case: + > - decimal for span used in stats. E.g. | stats span(value, 9.5) + + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:None` + + +"[y] is not a valid term" is a confusing error message since user can use `span(@timestamp, 1y)` + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:None` + + +Fixed. The error message becomes `Span length [2.5y] is invalid: floating-point time intervals are not supported.` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4716: Fix disk no space issue in bwc-test + +**URL:** https://github.com/opensearch-project/sql/pull/4716 + +**Author:** @LantaoJin + +**Created:** 2025-11-03T05:39:42Z + +**State:** MERGED + +**Merged:** 2025-11-03T08:29:43Z + +**Changes:** +201 -93 (4 files) + +**Labels:** `infrastructure`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +The current CI workflow of `bwc-tests` started 2 clusters with 3 nodes which is easily to fail with `no space left`: +(now we enabled the build cache in https://github.com/opensearch-project/sql/pull/4646) +``` +Execution failed for task ':integ-test:sqlBwcCluster#rollingUpgradeClusterTask'. +> Failed to copy /home/ci-runner/.gradle/caches/8.14/transforms/1e311199a08f55fdffb433b061421f92/transformed/opensearch-3.4.0-SNAPSHOT-linux-x64.tar.gz/opensearch-3.4.0-SNAPSHOT/lib/lucene-misc-10.3.1.jar to /__w/sql/sql/integ-test/build/testclusters/sqlBwcCluster0-2/distro/3.4.0-ARCHIVE/lib/lucene-misc-10.3.1.jar +> java.io.IOException: No space left on device +``` + +This PR separates the workflow of `bwc-tests` to two workflows:`bwc-tests-rolling-upgrade` and `bwc-tests-full-restart`. Each workflow only creates one cluster with 3 nodes. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4715: [Backport 2.19-dev] Pushdown the `top` `rare` commands to nested aggregation (#4707) + +**URL:** https://github.com/opensearch-project/sql/pull/4715 + +**Author:** @LantaoJin + +**Created:** 2025-11-03T03:35:51Z + +**State:** MERGED + +**Merged:** 2025-11-03T04:15:22Z + +**Changes:** +605 -290 (47 files) + + +## Description + +(cherry picked from #4707 commit 234f6083a0aefb98626c1443fc0ea80ace9ad290) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4714: Bump Calcite to 1.41.0 + +**URL:** https://github.com/opensearch-project/sql/pull/4714 + +**Author:** @LantaoJin + +**Created:** 2025-11-02T11:39:52Z + +**State:** MERGED + +**Merged:** 2025-11-03T09:12:48Z + +**Changes:** +233 -283 (51 files) + +**Labels:** `dependencies`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Bump Calcite to 1.41.0 which is released on Nov 1, 2025 + +### Related Issues +Resolves #4143 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2695` + + +@songkant-aws can you have a review on this part? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4713: Enhance doc and error message handling for `bins` on time-related fields + +**URL:** https://github.com/opensearch-project/sql/pull/4713 + +**Author:** @ahkcs + +**Created:** 2025-10-31T22:07:51Z + +**State:** MERGED + +**Merged:** 2025-12-10T20:22:11Z + +**Changes:** +54 -2 (5 files) + +**Labels:** `documentation`, `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + + +--- + +**Summary** + +This PR adds clear error handling and documentation for the known limitation that the bins parameter on timestamp fields requires pushdown to be enabled. + + + + +### Related Issues +* Resolves #https://github.com/opensearch-project/sql/issues/4578 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @noCharger on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java:None` + + +Nit: Describe the default behavior and the configuration that controls pushdown. + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java:None` + + +Although we enabled push down, we still don't support query like `source=events_null | bin @timestamp bins=3`. + +Shall we call out that it's also required to be used as the aggregation bucket field? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java:None` + + +Updated comments and documentation + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java:None` + + +Updated comments and documentation + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +I think we should change error message as well. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +Updated error message + + +## General Comments + + +### @ahkcs + + +unit tests in CI seems flaky + + +--- + +# PR #4712: [Backport 2.19-dev] bin command error message enhancement (#4690) + +**URL:** https://github.com/opensearch-project/sql/pull/4712 + +**Author:** @ahkcs + +**Created:** 2025-10-31T16:34:50Z + +**State:** MERGED + +**Merged:** 2025-11-01T01:37:05Z + +**Changes:** +342 -33 (10 files) + + +## Description + +(cherry picked from commit d1ffabd500e11730b982d53a29342e8953026724) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4711: [Backport 2.19-dev][Enhancement]Merge group fields for aggregate if having dependent group fields #4703 + +**URL:** https://github.com/opensearch-project/sql/pull/4711 + +**Author:** @qianheng-aws + +**Created:** 2025-10-31T08:26:51Z + +**State:** MERGED + +**Merged:** 2025-10-31T09:38:47Z + +**Changes:** +454 -72 (13 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4703 commit https://github.com/opensearch-project/sql/commit/5517c1ea7cd11df8fffa1642c77b3dccc51c5e1e) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4710: [Backport 2.19-dev] Do not remove nested fields in resolving AllFieldsExcludeMeta (#4708) + +**URL:** https://github.com/opensearch-project/sql/pull/4710 + +**Author:** @LantaoJin + +**Created:** 2025-10-31T08:21:37Z + +**State:** MERGED + +**Merged:** 2025-10-31T08:59:56Z + +**Changes:** +86 -2 (4 files) + + +## Description + +(cherry picked from #4708 commit 9953a5ac677e0e15bf9930e125e0964928a83dac) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4709: [Backport 2.19-dev] BucketAggretationParser should handle more non-composite bucket types (#4706) + +**URL:** https://github.com/opensearch-project/sql/pull/4709 + +**Author:** @LantaoJin + +**Created:** 2025-10-31T07:34:42Z + +**State:** MERGED + +**Merged:** 2025-10-31T08:20:16Z + +**Changes:** +162 -15 (5 files) + + +## Description + +(cherry picked from #4706 commit 373b394c9fdccb5d3d487f07b027d9e7ee97ddc6) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4708: Do not remove nested fields in resolving AllFieldsExcludeMeta + +**URL:** https://github.com/opensearch-project/sql/pull/4708 + +**Author:** @LantaoJin + +**Created:** 2025-10-31T05:23:03Z + +**State:** MERGED + +**Merged:** 2025-10-31T08:05:53Z + +**Changes:** +86 -2 (4 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix the issue that struct/nested field cannot be accessed after join (or other bi-operator): +``` +{ + "mappings": { + "properties": { + "time": { "type": "date" }, + "a": { + "properties": { + "b": { + "properties": { + "c": { "type": "keyword" } + } + } + } + }, + "d": { + "properties": { + "e": { + "properties": { + "f": { "type": "keyword" } + } + } + } + } + } + } +} +``` +For query +``` +source=test | join type=inner left=l, right=r ON l.time = r.time test +| fields a.b.c +``` +its AST tree is +``` +plan=Project(projectList=[AllFields()] + child=[Join( + left=SubqueryAlias(alias=l, + child=[Project(projectList=[AllFieldsExcludeMeta()] + child=[Relation(tableNames=[test])])])]), + right=SubqueryAlias(alias=r, + child=[Project(projectList=[AllFieldsExcludeMeta()] + child=[Relation(tableNames=[test])])])])]) +``` + +Current `visitProject()` **mistakenly removes nested fields** in handling `AllFieldsExcludeMeta`, which cause no `a.b.c` fields in join's children. So `a.b.c` cannot be accessed after join. + +### Related Issues +Resolves #4575 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java:68` + + +This code isn't for fixing the current issue, just a small enhancement for which name parts great than 1. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4707: Pushdown the `top` `rare` commands to nested aggregation + +**URL:** https://github.com/opensearch-project/sql/pull/4707 + +**Author:** @LantaoJin + +**Created:** 2025-10-30T15:30:22Z + +**State:** MERGED + +**Merged:** 2025-11-03T03:06:59Z + +**Changes:** +552 -269 (45 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Pushdown `top 10 usenull=true bytes by method` to nested terms aggregation: +``` +{ + "size": 0, + "aggs": { + "topBy": { + "terms": { + "field": "method" + }, + "aggs": { + "topField": { + "terms": { + "field": "bytes", + "size": 10, + "order": { + "_count": "desc" + } + } + } + } + } + } +} +``` +And pushdown `rare 10 usenull=true bytes by method` to: +``` +{ + "size": 0, + "aggs": { + "topBy": { + "terms": { + "field": "method" + }, + "aggs": { + "topField": { + "terms": { + "field": "bytes", + "size": 10, + "order": { + "_count": "asc" + } + } + } + } + } + } +} +``` + +Additional, rename `metrics` to `measure`, rename rule file names. + +### Related Issues +Resolves #4671 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml:12` + + +[non-blocking] It's advised to use rare_terms aggregation in place of terms aggregation with ascending count order: https://docs.opensearch.org/latest/aggregations/bucket/rare-terms/ + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Unintentional insertion of empty comments? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml:12` + + +Replace rare in an followup PR? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +deleted. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml:12` + + +Is it correct to push down limit into the size of the first level term agg? I guess it will produce 10000 * 2 rows in the end if there is enough buckets. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +Is it accuracy to set the row count to rare/top's number directly? Shouldn't be rare/top's number * estimated buckets' size? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +ditto + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml:12` + + +You guessed right. In general, neither 65535 nor 10000 is precise if the first group has 100,000 keys. + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml:12` + + +It's not proper to fix it in this PR. @qianheng-aws Can you submit another fix for limit pushdown on nested aggregate: Pushdown to first tier + keep EnumerableLimit. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +fixed + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +fixed + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4706: BucketAggretationParser should handle more non-composite bucket types + +**URL:** https://github.com/opensearch-project/sql/pull/4706 + +**Author:** @LantaoJin + +**Created:** 2025-10-30T10:19:17Z + +**State:** MERGED + +**Merged:** 2025-10-31T05:42:45Z + +**Changes:** +164 -14 (5 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +BucketAggretationParser should handle more non-composite bucket types + +### Related Issues +Resolves #4705 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:138` + + +This does not necessarily apply to all the rest types of bucket. E.g. I doubt that it won't work for `MultiTermsAggregation.Bucket`. But since there is no such aggregation in place yet, we can modify until then. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:138` + + +Good finding, actually we already support multi-terms. Fixed. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:29` + + +Can you help update this javadoc as well? + +Something like + +```suggestion +/** + * Use BucketAggregationParser for {@link MultiBucketsAggregation}, where it returns multiple buckets. + */ +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:29` + + +done + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4703: [Enhancement]Merge group fields for aggregate if having dependent group fields + +**URL:** https://github.com/opensearch-project/sql/pull/4703 + +**Author:** @qianheng-aws + +**Created:** 2025-10-30T07:08:25Z + +**State:** MERGED + +**Merged:** 2025-10-31T06:45:55Z + +**Changes:** +454 -72 (13 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev`, `clickbench` + + +## Description + +### Description +Merge group fields for aggregate if having dependent group fields. e.g. it will transform +``` +... | eval a1 = a - 1, a2 = f(a), a3 = 1 | stats count() by a, a1, a2, a3 +``` +to +``` +...| stats count() by a | eval a1 = a - 1, a2 = f(a), a3 = 1 +``` +which will both simplify aggregation complexity and also avoid agg-script push down. + +Currently, we only support single base group fields and all other group fields must have dependency on the base one. + +### Related Issues +Resolves #4681 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:62` + + +I'm curious when will there be more than 2 rels. With the given rule config, I assume it should match only the given pattern. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:62` + + +Agree. Just defensive check as other rules. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4702: [Backport 2.19-dev] Support 'usenull' option in PPL `top` and `rare` commands (#4696) + +**URL:** https://github.com/opensearch-project/sql/pull/4702 + +**Author:** @LantaoJin + +**Created:** 2025-10-30T03:40:33Z + +**State:** MERGED + +**Merged:** 2025-10-30T16:26:26Z + +**Changes:** +769 -146 (28 files) + + +## Description + +(cherry picked from #4696 commit c6a5fb9762aed964fe56513b80a55f35934b0255) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4701: [Backport 2.19-dev][Maintenance] Fix CVE-2025-48924 + +**URL:** https://github.com/opensearch-project/sql/pull/4701 + +**Author:** @RyanL1997 + +**Created:** 2025-10-30T02:30:41Z + +**State:** MERGED + +**Merged:** 2025-10-30T03:30:42Z + +**Changes:** +3 -0 (1 files) + + +## Description + +### Description +Manual backport + +### Related Issues +* Backport #4665 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4700: [Backport 2.19-dev] Support millisecond span (#4672) + +**URL:** https://github.com/opensearch-project/sql/pull/4700 + +**Author:** @yuancu + +**Created:** 2025-10-30T02:22:16Z + +**State:** MERGED + +**Merged:** 2025-10-30T05:45:24Z + +**Changes:** +181 -47 (13 files) + + +## Description + +### Description +Backport #4672 to 2.19-dev + + +### Commit Message + +* Support millisecond span + + + +* Update per funciton tests + + + +--------- + + +(cherry picked from commit b224750d9bb865aa6695055c6ec4246485f597c2) + + + +### Related Issues +Resolves #4550 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4699: [AUTO] Add release notes for 2.19.4 + +**URL:** https://github.com/opensearch-project/sql/pull/4699 + +**Author:** @opensearch-ci-bot + +**Created:** 2025-10-29T22:08:58Z + +**State:** MERGED + +**Merged:** 2025-10-30T17:39:28Z + +**Changes:** +6 -0 (1 files) + + +## Description + +Add release notes for 2.19.4 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +It seems like the bot was picking the wrong `HEAD`? + + + +### @Swiddis + + +Cc. @gaiksaya + + +### @gaiksaya + + +> It seems like the bot was picking the wrong `HEAD`? + +Fixed the base branch + + +--- + +# PR #4698: Support wildcard for replace command + +**URL:** https://github.com/opensearch-project/sql/pull/4698 + +**Author:** @ahkcs + +**Created:** 2025-10-29T20:59:35Z + +**State:** MERGED + +**Merged:** 2025-11-04T19:00:20Z + +**Changes:** +722 -14 (9 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Support wildcard for replace command + +### Related Issues +* Resolves #3975 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +issue: No way to escape asterisks + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +question: What are the perf implications of reprocessing the pattern from scratch on every record? + +Ideally it'd be faster to compile the pattern once and reuse it to reduce branching in the inner loop. But I don't know if it's a measurable difference. + + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +suggestion (if-minor): Compiling an equivalent Regex matcher might be faster and simpler. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +That's a great catch! For now I am marking it as a limitation in the docs since the use case for literal asterisks is rare. Do you think we should implement escape instead or we can keep it as a limitation? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +That's a great suggestion! Refactored to use regex matching + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +That's a good catch! Added LRU pattern cache to compile once per unique pattern string instead of compiling on every row + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +What happens if pattern contains regex? I think we need to escape regex. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/udf/WildcardReplaceFunctionImpl.java:None` + + +If we use regex to execute the replace, should we rather utilize existing function for regex replacement? In that case, we just need to compile pattern during planning, and don't need caching during execution. + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +We can keep it as a limitation for now, but I'm a bit concerned about breaking a potential existing usage with no workaround. I don't know if who's using asterisks today, the number is probably low, but it'd be surprise me if it was 0. + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +He's using `Pattern.quote` to escape the literal parts on line 80. + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +Especially if we're already building the final pattern with regex split (L76), we might be able to just make the split have negative lookahead with `Pattern.split("(?!\\\\)\\*")` + +Feel free to skip this if it's more complicated than that + + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java:None` + + +Updated to support escaping asterisks + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/udf/WildcardReplaceFunctionImpl.java:None` + + +I think that is a great improvement! I have updated, now wildcard patterns convert to regex at planning time. Also updated to use Clacite's REGEXP_REPLACE_3 operator instead of custom UDF, removed WildcardReplaceFunctionImpl, WildcardReplaceUtils, and runtime caching + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java:105` + + +Let's add comprehensive unit tests for the methods in WildcardUtils. + + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java:None` + + +Should it be private? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java:None` + + +ditto + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java:105` + + +Added unit tests in WildcardUtilsTest + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java:None` + + +Changed to private + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java:None` + + +Changed to private + + +## General Comments + + +### @LantaoJin + + +The bwc-tests failed due to no space left on device which can be fixed by #4716 , no related to this PR but better to merge upstream (after 4716 merged) then rerun. + + +--- + +# PR #4697: [Backport 2.19-dev] Update search.rst documentation + +**URL:** https://github.com/opensearch-project/sql/pull/4697 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-29T18:46:19Z + +**State:** MERGED + +**Merged:** 2025-10-29T19:10:47Z + +**Changes:** +1 -1 (1 files) + + +## Description + +Backport e5fb9b52d53354075493350e24b9be3727ba1aea from #4686. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4696: Support 'usenull' option in PPL `top` and `rare` commands + +**URL:** https://github.com/opensearch-project/sql/pull/4696 + +**Author:** @LantaoJin + +**Created:** 2025-10-29T09:42:41Z + +**State:** MERGED + +**Merged:** 2025-10-30T02:48:08Z + +**Changes:** +767 -146 (28 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +* usenull=\: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``: + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` + +See #4684 for examples. + +### Related Issues +Resolves #4684 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/rare.rst:32` + + +[bullet points in blockquote](https://github.com/LantaoJin/search-plugins-sql/blob/291e3fc183871fe4879c195586b3431f844dbe32/docs/user/ppl/cmd/rare.rst), is it expected? + + +### @LantaoJin on `docs/user/ppl/cmd/rare.rst:32` + + +Yes. Keep the format same with the [current stats.rst](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/stats.rst#syntax). I tried to use the new format to align with [Update PPL Command Documentation](https://github.com/opensearch-project/sql/pull/4562). But seems there are still many format problems in that PR. [ref](https://github.com/opensearch-project/sql/pull/4562#discussion_r2476160870) + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1879` + + +Why not apply `isNotNull` directly on `groupByList`, but on their underlying input ref? If some operation converts a `null` field to a non-null one, I think it should not be filtered out. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1879` + + +@yuancu +Think about query: +``` +| eval a = nullif(status, "200") | stats bucket_nullable = false count() by a +``` +The `groupByList` contains `RexCall "AS($9, 'a')"`. So we finally build a +`context.relBuilder.filter(isNotNull($9))` which $9 is `nullif(status, "200")` instead of `status` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4695: Adding IT suite for PPL-based dashboards in Neo for CloudWatch Lake + +**URL:** https://github.com/opensearch-project/sql/pull/4695 + +**Author:** @aalva500-prog + +**Created:** 2025-10-29T06:08:26Z + +**State:** MERGED + +**Merged:** 2025-11-10T19:36:55Z + +**Changes:** +4167 -0 (17 files) + +**Labels:** `PPL`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +This PR adds integration tests and documentation for PPL based dashboards covering NFW, CloudTrail, WAF, and VPC logs in Neo for CloudWatch Lake. + +### Changes + +**New Integration Tests:** +* Added NFW (Network Firewall) PPL dashboard integration tests +* Added CloudTrail PPL dashboard integration tests +* Added VPC Flow Logs PPL dashboard integration tests +* Added WAF PPL dashboard integration tests +* Reorganized PPL dashboard tests into dedicated dashboard/ package + +**Test Infrastructure:** +* Added new index mappings and test data for NFW, CloudTrail, VPC, and WAF logs + +**Documentation:** +* Added documentation for NFW, CloudTrail, VPC, and WAF PPL based dashboards integration tests + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `docs/dashboard/CLOUDTRAIL_PPL_INTEGRATION_TESTS.md:1` + + +nit: why are all these file names capitalized? + + +### @Swiddis on `docs/dashboard/CLOUDTRAIL_PPL_INTEGRATION_TESTS.md:None` + + +suggestion: These docs should all be bundled besides the tests, not kept in separate files. + +It's easier to maintain the IT suite if all the test information is self-contained in the same location as the tests themselves. Future readers might find the tests first and not know these files exist. + + +### @Swiddis on `docs/dashboard/NFW_PPL_INTEGRATION_TESTS.md:None` + + +praise: I love that these breakdowns include the purpose of each query. + +Similar to the above comment, why are we including the expectations as part of docs that are separate from the tests? This is going to get out of sync fast if data changes. + +If we want this documented in human readable form, we should put it in doctest. This could be a good starting point to add a guide in reference to #4145 + + +### @Swiddis on `integ-test/src/test/resources/vpc_logs.json:1` + + +question: Where exactly is this data from? + +We had the issue with integrations that we committed a bunch of sample data and lost the source it was generated from. That _would_ be a good thing to put in the docs. + + +### @RyanL1997 on `integ-test/src/test/resources/vpc_logs.json:1` + + ++1 We need to get some data from the actual real flow, like from the actual loggroup of VPC logs + + +### @aalva500-prog on `docs/dashboard/CLOUDTRAIL_PPL_INTEGRATION_TESTS.md:1` + + +I'll change that, thanks! + + +### @aalva500-prog on `docs/dashboard/NFW_PPL_INTEGRATION_TESTS.md:None` + + +You are right, we should remove the expectations from here and put it in doctest. I'll update it, thanks! + + +### @RyanL1997 on `integ-test/src/test/resources/vpc_logs.json:1` + + +Let me actually follow up on this. I do have some existing flow that I can export some sanitized data. Cuz even for the verification of query correctness, we may need some field with relatively high cardinality. + + +### @aalva500-prog on `integ-test/src/test/resources/vpc_logs.json:1` + + +The data you see here is coming from my own S3 bucket, which I use during the integration creation workflow in OpenSearch Dashboards. However, this is not real data and it only has a few records, as the purpose of this exercise was to test query correctness specifically. I had to change the data to avoid exposing sensitive info also. + + +### @aalva500-prog on `integ-test/src/test/resources/vpc_logs.json:1` + + +During the new workflow in PPL based dashboards the data should come directly from CW log groups, though. Apart from the NFW, the rest of the queries have been already tested by DQS team connecting directly with CW log groups. However, for NFW the data has been retrieved directly from own CW log groups, so the schema is correct, but I can add some more data if needed. + + +### @aalva500-prog on `integ-test/src/test/resources/vpc_logs.json:1` + + +That would be good, thanks @RyanL1997 + +> Let me actually follow up on this. I do have some existing flow that I can export some sanitized data. Cuz even for the verification of query correctness, we may need some field with relatively high cardinality. + + + + +### @aalva500-prog on `docs/dashboard/CLOUDTRAIL_PPL_INTEGRATION_TESTS.md:None` + + +Sure, I'll put all the files in the same location, thanks! + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst:7` + + +As I understand, the IT introduced in this PR is like sanity test in https://github.com/opensearch-project/opensearch-spark/pull/995. But could you clarify what's the purpose of this doc? What is the audience of this doc? + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst:7` + + +Hi @dai-chen, + +Thank you for your feedback. You are absolutely correct that this PR serves as sanity testing to ensure PPL-based dashboard queries remain functional as the SQL plugin evolves. + +However, the primary objective of this documentation is to provide real-world examples, as outlined in issue (https://github.com/opensearch-project/sql/issues/4145). Our current PPL commands documentation exclusively uses the `accounts` index, which represents a simplified dataset not typically found in production environments. + +In contrast, the examples included in this PR contain data patterns that AWS customers commonly encounter in their actual use cases. This approach provides more practical and relevant examples for our documentation. + +I hope this clarifies the rationale behind this contribution. + + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst:7` + + +I see. So this is essentially to showcase how to use PPL to do analytics on real-world data (AWS logs). I feel our RST is not the best place for this. Please follow up with @ritvibhatt for this. + + +## General Comments + + +### @RyanL1997 + + +LGTM + + +--- + +# PR #4693: Bump commons-lang3 version to resolve CVE-2025-48924 + +**URL:** https://github.com/opensearch-project/sql/pull/4693 + +**Author:** @gaiksaya + +**Created:** 2025-10-29T00:48:28Z + +**State:** MERGED + +**Merged:** 2025-10-29T01:11:36Z + +**Changes:** +5 -3 (4 files) + + +## Description + +### Description +Bump commons-lang3 version to resolve CVE-2025-48924 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4692: [Backport 2.19-dev] [BugFix] Fix unexpected shift of extraction for `rex` with nested capture groups in named groups + +**URL:** https://github.com/opensearch-project/sql/pull/4692 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-29T00:34:56Z + +**State:** MERGED + +**Merged:** 2025-10-29T08:44:16Z + +**Changes:** +274 -63 (7 files) + + +## Description + +Backport 0c1ec27da389be9e434158d67df8dbc993bc45fe from #4641. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4691: [Backport 2.19-dev] Fix asc/desc keyword behavior for sort command + +**URL:** https://github.com/opensearch-project/sql/pull/4691 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T23:27:37Z + +**State:** MERGED + +**Merged:** 2025-10-29T00:03:51Z + +**Changes:** +333 -76 (9 files) + + +## Description + +Backport 448c42ac5a38f2089e92520ce62cfd083bf52afc from #4651. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4690: bin command error message enhancement + +**URL:** https://github.com/opensearch-project/sql/pull/4690 + +**Author:** @ahkcs + +**Created:** 2025-10-28T23:02:57Z + +**State:** MERGED + +**Merged:** 2025-10-30T22:54:32Z + +**Changes:** +342 -32 (10 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +When the bin command is applied to a non-numeric field (e.g., string) with numeric span parameters, the command fails with unclear Calcite type validation errors, making it difficult for users to identify the root cause + +This PR Added clear validation in BinFieldValidator that throws SemanticCheckException with message, Added field type checks in all numeric bin handlers before Calcite validation, providing clearer user-facing error messages + +### Related Issues +Partially resolves #4590 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +QQ: what's the expected behavior? In PR description, semantic exception is expected. In issue https://github.com/opensearch-project/sql/issues/4590, the expectation is "the bin command should gracefully skip numeric binning for non-numeric fields and preserve the original field values instead of throwing an error." ? + + +### @ykmr1224 - APPROVED + + +LGTM with minor comment. + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinFieldValidator.java:None` + + +thought: This is a pretty useful general utility outside of `bin` + +Should we define it closer to where `SqlTypeName` is defined? We have lots of hand-rolled field logic for different commands and it's led to some issues when the type list change (e.g. OpenSearch recently introducing `TIMESTAMP_MICROS`). + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinFieldValidator.java:None` + + +suggestion: Instead, load numeric fields into a data class that directly supports binning operations. + +When we validate with a `void` method, we don't track any validation information in the type system, so we rely on downstream logic to "know" validation was run. It's easy to forget it or unnecessarily run it multiple times. If we instead put this validation in a constructor for a relevant class, and Bin uses this class, then the fact the class exists means we've definitely run validation. + +This adds safety, makes these field operations easier to test, makes useful "numeric-field-only" operations more reusable, and simplifies the bin command's logic. + + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinFieldValidator.java:None` + + +Good suggestion! I've moved the type checking utilities to OpenSearchTypeFactory. Future type additions (like TIMESTAMP_MICROS) will only need updates in one place + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinFieldValidator.java:None` + + +Great suggestion! I've refactored to use a type-safe validation approach by creating a `BinnableField` class that validates in its constructor and updated all 5 bin handlers to use it. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java:61` + + +nit: This method was confusing to me. It might be better changing it to `isNumeric` is it is actually used to check if the field is numeric. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java:61` + + +Hi Tomo, this method exists because some parameters such as `minspan`, `start`, `end` currently don't support time-based binning, so they will use this method to check first + + +## General Comments + + +### @ahkcs + + +> QQ: what's the expected behavior? In PR description, semantic exception is expected. In issue #4590, the expectation is "the bin command should gracefully skip numeric binning for non-numeric fields and preserve the original field values instead of throwing an error." ? + +Hi @dai-chen , this PR is for short-term fix to enhance error handling, our long-term goal would be the expected behavior in #4590 + + +### @LantaoJin + + +@ahkcs backport required, thanks. + + +--- + +# PR #4689: [Backport 2.19-dev] Fixes for `Multisearch` and `Append` command + +**URL:** https://github.com/opensearch-project/sql/pull/4689 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T20:23:17Z + +**State:** MERGED + +**Merged:** 2025-10-28T22:18:17Z + +**Changes:** +171 -251 (9 files) + + +## Description + +Backport 0dd594949d78b842b618e02031be4429302005e9 from #4512. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4688: Introduce RelBuilder wrapper for dynamic fields + +**URL:** https://github.com/opensearch-project/sql/pull/4688 + +**Author:** @ykmr1224 + +**Created:** 2025-10-28T17:12:49Z + +**State:** MERGED + +**Merged:** 2025-11-04T17:11:44Z + +**Changes:** +1536 -169 (17 files) + +**Labels:** `enhancement`, `PPL`, `calcite` + +**Assignees:** @ykmr1224 + + +## Description + +This PR is for feature branch `feature/permissive` + +### Description +- Introduce RelBuilderWrapper and RelFieldBuilder to abstract the field operations with dynamic fields. +- RelBuilderWrapper uses delegation to RelBuilder so that it won't allow direct operation on fields without considering dynamic fields. + - It uses delegation instead of inheritance since RelBuilder internally call `field` or `fields` methods and inheritance cannot prohibit access to some methods. + - RelBuilderWrapper hide original field/fields operations, and provide some package private method to allow RelFieldBuilder to access raw operations. +- It leaves fixes for several commands to later PR to avoid this PR becomes too big. + +### Related Issues +Permissive mode RFC: https://github.com/opensearch-project/sql/issues/4349 +Dynamic fields RFC: https://github.com/opensearch-project/sql/issues/4433 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +why not just leverage the [OpenSearchRelBuilder](https://github.com/opensearch-project/sql/blob/main/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java#L179) directly? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +If you want a derived class of RexBuilder, we can create one as well in [OpenSearchPrepareImpl.perform()](https://github.com/opensearch-project/sql/blob/main/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java#L220) + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java:79` + + +`OpenSearchRelBuilder` should be returned here, so if you reuse `OpenSearchRelBuilder`, no need to wrap again. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +As I described in the PR description, inheritance didn't work well since there are some internal access within RelBuilder to the methods we want to prohibit. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java:79` + + +Please see my above comment. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelFieldBuilder.java:59` + + +how about simplify all `staticField` to `field`? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Override all `fields` of raw RelBuilder in OpenSearchRelBuilder cannot work? + +In `CalciteRelNodeVisitor`, all calling to `context.relBuilder.fields/field` are interrupted in `OpenSearchRelBuilder.fields/field`. +But in `calcite.RelBuilder`, all `fields`/`field` will access its raw `fields`/`field` method. <=== do you want to prohibit this? + +Delegation might not work either. For example, `RelBuilder.sort()` calls `RelBuilder.field()`. How to prohibit this calling with delegation class? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +> In CalciteRelNodeVisitor, all calling to context.relBuilder.fields/field are interrupted in OpenSearchRelBuilder.fields/field. +> But in calcite.RelBuilder, all fields/field will access its raw fields/field method. <=== do you want to prohibit this? + +No, inheritance won't work like that. If we override fields/field in a subclass, even the call from calcite.RelBuilder will call the method in subclass. + +> Delegation might not work either. For example, RelBuilder.sort() calls RelBuilder.field(). How to prohibit this calling with delegation class? + +The idea is that, if the method won't work as we expect, customize or prohibit (and provide alternative way) the usage in RelBuilderWrapper. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/rel/RelFieldBuilder.java:59` + + +I named it as `staticField` so it would be more explicit to access static field. +We want to avoid someone unintentionally omit the existence of dynamic fields. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +> No, inheritance won't work like that. If we override fields/field in a subclass, even the call from calcite.RelBuilder will call the method in subclass. + +Ok, can this work for you? + +``` +public class TestInheritance { + + static class RelBuilder { + public void fields() { + System.out.println("fields"); + field(); + } + public void field() { + System.out.println("field"); + } + public void otherUnrelated() { + System.out.println("others"); + } + } + + static class DerivedRelBuilder extends RelBuilder { + @Override + public void fields() { + System.out.println("Derived fields"); + super.field(); // change to add super if want to call origin + } + @Override + public void field() { + System.out.println("Derived field"); + } + } + + public static void main(String[] args) { + RelBuilder relBuilder = new DerivedRelBuilder(); + System.out.println("Test fields:"); + relBuilder.fields(); + System.out.println("Test field:"); + relBuilder.field(); + System.out.println("Test unrelated:"); + relBuilder.otherUnrelated(); + } +} +``` + +> Test fields: +> Derived fields +> field +> Test field: +> Derived field +> Test unrelated: +> others + + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Thanks for the further deep dive. +The issue is, some methods in original RelBuilder calls methods which we want to prohibit. How can we selectively allow the call to the same method and delegate to super? +We can do it if we override the method which calls the methods we want to prohibit, but that would require copying the original implementation from Calcite. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +> We can do it if we override the method which calls the methods we want to prohibit + +Now I know why you use a decorator pattern. +A simple way is adding an override method to throw an exception in `OpenSearchRelBuilder` +``` + @Deprecated + @Override + public RexInputRef field(String fieldName) { + throw new ForbiddenMethodCall("field() is not supported in OpenSearchRelBuilder"); + } +``` +But it just throws runtime exception, so our UT and IT will help us to avoid using those forbidden methods. To throw compile error, an alternative is creating an annotation with `com.google.errorprone`. + + + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Overall, the simple approach is merging current `RelFieldBuilder` and `OpenSearchRelBuilder`. But It's okey that we keep the current `RelBuilderWrapper`. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Oh, I think we still need to use RelBuilder or its derived class OpenSearchRelBuilder because RelBuilder will be used in Calcite optimization rules. For example, https://github.com/opensearch-project/sql/blob/c9a8f47ea993c95bb6b13023de68ab54ddedba00/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java#L84 + +we do have opportunity to get the OpenSearchRelBuilder from `call.builder()` but no way to get the wrapper. + +How about just add dynamic methods in OpenSearchRelBuilder and keep the original `field` and `fields` as static field methods. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Hi Lantao, +Can you revisit my comment? +I think you haven't understood why inheritance won't work. +If we inherit and override method with throwing exception, internal call from other methods in RelBuilder will also fail. And I don't have good idea to workaround this other than reimplementing the method and call super.field instead, which duplicates the implementation from Calcite and not maintainable. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Yes, you are right, in inheritance, if the method calling chain is A() -> B() -> C(). The overridden method A'() will still call the overridden C'(): A'() -> B() -> C'() . + +Can we just add `@Deprecated` and java doc on the override method to callout this method should not be used in the project? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/rel/RelBuilderWrapper.java:36` + + +Just a suggestion @ykmr1224 . I am okey on the current POC, we can postpone the problem about how to restrict the usage of RelBuilder in optimize rules (Calcite internal). + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4687: [Backport 2.19-dev] Enhance dynamic source clause to support metadata only filters + +**URL:** https://github.com/opensearch-project/sql/pull/4687 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T16:17:23Z + +**State:** MERGED + +**Merged:** 2025-10-29T08:47:46Z + +**Changes:** +200 -64 (2 files) + + +## Description + +Backport e6eb8081c19ecc65b98125c6bcea9fd3f71f3b4c from #4554. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4686: Update search.rst documentation + +**URL:** https://github.com/opensearch-project/sql/pull/4686 + +**Author:** @vamsimanohar + +**Created:** 2025-10-28T15:49:14Z + +**State:** MERGED + +**Merged:** 2025-10-29T18:46:06Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `documentation`, `backport 2.19-dev` + +**Assignees:** @vamsimanohar + + +## Description + +### Description +Update search.rst documentation to give more information on ip datatypes. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @anasalkouz on `docs/user/ppl/cmd/search.rst:127` + + +Maybe we should be more specific. this wildcards IP matching is not supported using IP field + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:127` + + +The above paragraph is under IP fields section only. and the first line says. No wildcards for partial IP matching. + +https://github.com/vamsimanohar/sql/blob/doc/docs/user/ppl/cmd/search.rst#field-types-and-search-behavior + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4685: [Backport 2.19-dev] Support ISO8601-formatted string in PPL (#4246) + +**URL:** https://github.com/opensearch-project/sql/pull/4685 + +**Author:** @yuancu + +**Created:** 2025-10-28T10:10:09Z + +**State:** MERGED + +**Merged:** 2025-10-29T09:11:19Z + +**Changes:** +87 -24 (6 files) + + +## Description + +### Description + +Backport #4246 to 2.19-dev + +### Commit Message + +* Support parsing ISO 8601 datetime format for timestamp value + + + +* Modify tests for ISO 8601 timestamp input + + + +* Add support of iso 8601 date string to date and time + +- add an IT for date time comparison with iso 8601 formatted literal + + + +--------- + + +(cherry picked from commit 42c13b40f2d3e8d7938e1ca95d9cc4fc74fbba5d) + + +### Related Issues +Resolves #4188 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4683: Fix sub-fields accessing of generated structs + +**URL:** https://github.com/opensearch-project/sql/pull/4683 + +**Author:** @yuancu + +**Created:** 2025-10-28T08:02:15Z + +**State:** MERGED + +**Merged:** 2025-11-03T09:12:00Z + +**Changes:** +48 -0 (4 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +This PR fixes a bug where nested fields of generated structs (like those created by the `geoip()` function) were not accessible, returning incorrect field names and null values. + +**Problem** + +When accessing nested fields from generated structs using dot notation, the system would: +1. Return incorrect field names +2. Return null values instead of the actual nested field data +3. Return incorrect types + +**Solution** + +The fix involves two main changes: + +QualifiedNameResolver.java: + - Fixed the joinParts calculation logic when creating item access for nested fields + - Added proper aliasing to preserve the original qualified field name in query results + +**Testing** + +- Added integration tests in CalciteGeoIpFunctionsIT.java to verify the fix +- Added YAML test cases to validate both logical and physical query plans +- Tests confirm that accessing info.dummy_sub_field now returns the correct field name and value + + +### Related Issues +Resolves #4682 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Did your merge upstream action happen before #4716 being merged? + + +--- + +# PR #4680: [Backport 2.19-dev] Update big5 ppl queries and check plans + +**URL:** https://github.com/opensearch-project/sql/pull/4680 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T05:54:04Z + +**State:** MERGED + +**Merged:** 2025-10-28T09:03:56Z + +**Changes:** +3175 -108 (152 files) + + +## Description + +Backport 0f453825919178991f5e432941b92a1f8b901d19 from #4668. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4679: [Backport 2.19-dev] Make nested alias type support referring to outer context + +**URL:** https://github.com/opensearch-project/sql/pull/4679 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T05:22:00Z + +**State:** MERGED + +**Merged:** 2025-10-28T08:47:35Z + +**Changes:** +98 -29 (6 files) + + +## Description + +Backport fea679ddfb817aa003a1114e2a756ab3d62f08c7 from #4673. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4677: [Backport 2.19-dev] `Bin` command big5 queries + +**URL:** https://github.com/opensearch-project/sql/pull/4677 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T03:30:32Z + +**State:** MERGED + +**Merged:** 2025-10-28T05:52:00Z + +**Changes:** +26 -0 (4 files) + + +## Description + +Backport 945235ab886b7fdf2db8b1cc9bbd0d2bf56df798 from #4163. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4676: [Backport 2.19-dev] Support push down sort after limit + +**URL:** https://github.com/opensearch-project/sql/pull/4676 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-28T02:12:23Z + +**State:** MERGED + +**Merged:** 2025-10-28T03:00:47Z + +**Changes:** +89 -32 (13 files) + + +## Description + +Backport d4a2d1974803512fb622f5dd30633842ea663ffc from #4657. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4674: [Backport 2.19-dev] Use table scan rowType in filter pushdown could fix rename issue + +**URL:** https://github.com/opensearch-project/sql/pull/4674 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-27T14:27:57Z + +**State:** MERGED + +**Merged:** 2025-10-28T02:06:15Z + +**Changes:** +70 -2 (2 files) + + +## Description + +Backport 018fc5958288d2b7cadbc9e91f7f88473c9cf594 from #4670. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4673: Make nested alias type support referring to outer context + +**URL:** https://github.com/opensearch-project/sql/pull/4673 + +**Author:** @qianheng-aws + +**Created:** 2025-10-27T10:17:00Z + +**State:** MERGED + +**Merged:** 2025-10-28T05:21:45Z + +**Changes:** +98 -29 (6 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Make nested alias type support referring to outer context + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4559 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4672: Support millisecond span + +**URL:** https://github.com/opensearch-project/sql/pull/4672 + +**Author:** @yuancu + +**Created:** 2025-10-27T08:40:05Z + +**State:** MERGED + +**Merged:** 2025-10-30T02:07:14Z + +**Changes:** +180 -47 (13 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Milliseconds had been improperly converted to microseconds. This PR fixes the issue by incorporating support to milliseconds. + +Additionally, this PR correct a minor flaw of per function implementations so that they work properly with milliseconds. + +### Related Issues +Resolves #4550 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +## Review Comments + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:1110` + + +Can you add some tests for `MICROSECOND` + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml:39` + + +can you add a test for `MICROSECOND`: +``` +source=test_data_2023 | timechart span=500000us count() +``` + + +### @yuancu on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml:39` + + +It seems the minimum date time unit supportted in span function is millisecond: + +- [SpanFunction.java](https://github.com/opensearch-project/sql/blob/484f49ebaa44264b421f54225dc19e2623ae9f85/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java) + ```java + public static Object evalTimestamp( + @Parameter(name = "value") String value, + @Parameter(name = "interval") int interval, + @Parameter(name = "unit") String unit) { + ExprValue exprInterval = ExprValueUtils.fromObjectValue(interval, ExprCoreType.INTEGER); + ExprValue exprValue = ExprValueUtils.fromObjectValue(value, ExprCoreType.TIMESTAMP); + Rounding rounding = new TimestampRounding(exprInterval, unit); + return rounding.round(exprValue).valueForCalcite(); + } + ``` +- then in [Rounding.java](https://github.com/opensearch-project/sql/blob/e4c299a73454531a05656c9fc38ab6e6681b8e4f/core/src/main/java/org/opensearch/sql/planner/physical/collector/Rounding.java) + ```java + DateTimeUnit.resolve(unit) + + ... + + public enum DateTimeUnit { + @Getter private final int id; + @Getter private final String name; + protected final boolean isMillisBased; + protected final long ratio; + + MILLISECOND(1, "ms", true, ChronoField.MILLI_OF_SECOND.getBaseUnit().getDuration().toMillis()) { + @Override + public long round(long utcMillis, int interval) { + return DateTimeUtils.roundFloor(utcMillis, ratio * interval); + } + }, + + SECOND(2, "s", true, ChronoField.SECOND_OF_MINUTE.getBaseUnit().getDuration().toMillis()) { + @Override + public long round(long utcMillis, int interval) { + return DateTimeUtils.roundFloor(utcMillis, ratio * interval); + } + }, + ... + } + ``` + +`DateTimeUnit` is built based on milliseconds. Its `ration` attribute if of type `long`, thus can not represent a fraction of millisecond. + +Should I refactor this part to extend its support to microsecond and nanosecond? + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml:39` + + +What does the current user doc say? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:507` + + +If we want to expose microsecond span support, we need to update all docs related? Also we have to change `per_*` function to microsecond-based, otherwise `timechart span=1us per_second...` will break? + +I'm thinking shall we separate the PR, e.g., fix mismatch of microsecond and millisecond in this PR and decide whether to expose all subsecond to PPL span clause later? + + +### @yuancu on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml:39` + + +In [the doc for stats command](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/stats.rst), it claims that up to milliseconds is supported: + + +| Span Interval Units | +|----------------------| +| millisecond (ms) | +| second (s) | +| minute (m, case sensitive) | +| hour (h) | +| day (d) | +| week (w) | +| month (M, case sensitive) | +| quarter (q) | +| year (y) | + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml:39` + + +Ok, we can file a new PR to support `MICROSECOND` , as well as `source=test_data_2023 | timechart span=500000us count()` + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:507` + + +It seems microseconds are not supported by span yet. + +I'm wondering if we are going to expand the support to microseconds, as nanoseconds are also supported in PPL, should we also do it for nanoseconds? + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/ast/expression/SpanUnit.java:19` + + +question: `μs`? Why fear unicode in the strings? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:507` + + +I'm under the same impression for `span` with `timechart` command. Probably we can track this as separate task and don't need to make more changes for span and per_* function here. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/expression/SpanUnit.java:19` + + +That's a good point. I didn't think of it. I used `us` because it was already used to represent microseconds in the grammar file: +https://github.com/opensearch-project/sql/blob/fcff083fd1a8b15686c22cf7decb80ec6fd620fd/ppl/src/main/antlr/OpenSearchPPLLexer.g4#L505 + +Besides, `us` has the merit in its convenience of typing; SPL also adopts `us` for microseconds: +``` +Error in 'timechart' command: The value for option span (1μs) is invalid. When span is expressed using a sub-second unit (ds, cs, ms, us), the span value needs to be < 1 second, and 1 second must be evenly divisible by the span value. +``` + +Therefore, I think it's acceptable to adopt `us` for our use case. But of course, we can extend the support to `μs`. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/expression/SpanUnit.java:19` + + +It comes to me that `cs` and `ds` are not supported well in span as well. I'll raise another issue for it. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4670: Use table scan rowType in filter pushdown could fix rename issue + +**URL:** https://github.com/opensearch-project/sql/pull/4670 + +**Author:** @LantaoJin + +**Created:** 2025-10-27T07:13:35Z + +**State:** MERGED + +**Merged:** 2025-10-27T08:55:19Z + +**Changes:** +70 -2 (2 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Use table scan rowType in filter pushdown could fix rename issue + +### Related Issues +Resolves #4563 +Resolves #4664 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4669: [Backport 2.19-dev] Fix: Support Alias Fields in MIN, MAX, FIRST, LAST, and TAKE Aggregations + +**URL:** https://github.com/opensearch-project/sql/pull/4669 + +**Author:** @ahkcs + +**Created:** 2025-10-27T01:23:46Z + +**State:** MERGED + +**Merged:** 2025-10-27T21:15:15Z + +**Changes:** +355 -146 (37 files) + + +## Description + +Fix: Support Alias Fields in MIN, MAX, FIRST, LAST, and TAKE Aggregations (#4621) + +(cherry picked from commit c6a3a70ac0d0e3ae3aa5eaefba88dab000bc70d5) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4668: Update big5 ppl queries and check plans + +**URL:** https://github.com/opensearch-project/sql/pull/4668 + +**Author:** @LantaoJin + +**Created:** 2025-10-26T06:01:31Z + +**State:** MERGED + +**Merged:** 2025-10-28T02:36:31Z + +**Changes:** +3175 -108 (152 files) + +**Labels:** `bug`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Update big5 ppl queries: +- Add original DSL as comment in ppl files +- Add a new folder `big5/queries/optimized/`: `CalcitePPLBig5IT` first loads the ppl file from this folder. If it cannot be found, it loads the ppl file from `big5/queries/`. + * `stats` in this folder will set `bucket_nullable = false` to align with related DSL. + * queries in this folder can be pushed to new optimizations such as `auto-date-histogram`, `range` and `multi-terms` bucket aggregation. +- Add missing big5 queries: + * [cardinality_agg_high.ppl](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/big5/operations/default.json#L886) + * [cardinality_agg_high_2.ppl](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/big5/operations/default.json#L904) + * [cardinality_agg_low.ppl](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/big5/operations/default.json#L871) + * [range_agg_1.ppl](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/big5/operations/default.json#L656) + * [range_agg_2.ppl](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/big5/operations/default.json#L695) +- Align big5 ppl queries to dsl in operations, mentioned in https://github.com/opensearch-project/sql/pull/4198 +- Fix some incorrect ppl queries: + * [multi_terms_keyword.ppl](https://github.com/opensearch-project/sql/pull/4668/files#diff-1876271928fc1ed301868c0e2dfd7f26af27db8ef921264ed67bd9b971ea6650) should add `head 10` + * [query_string_on_message.ppl](https://github.com/opensearch-project/sql/pull/4668/files#diff-5c2771fa50710829e42b638feaa6e79f33bfc0854ce5505f1ccef25f265eee09) should add `OR`. +- Add plan checks for big5 queries. + +### Related Issues +Resolves #4667 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +Backport to 2.19-dev. + + +## Review Comments + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java:427` + + +why have optimized folder? it required calcite enable? +IMO, PPL big5 query should be optimized queries. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java:427` + + +That because 1. bucket_nullable argument, 2. bin command + + +## General Comments + + +### @LantaoJin + + +@noCharger please view this update and we might need to update our benchmark as well. + + +### @penghuo + + +for queries defined big5/queries/optimized/, we should updated in opensearch-benchmark repo? + + +### @LantaoJin + + +> for queries defined big5/queries/optimized/, we should updated in opensearch-benchmark repo? + +Yes. cc @noCharger and @xinyual + + +--- + +# PR #4665: [Maintenance] Fix CVE-2025-48924 + +**URL:** https://github.com/opensearch-project/sql/pull/4665 + +**Author:** @RyanL1997 + +**Created:** 2025-10-25T23:26:19Z + +**State:** MERGED + +**Merged:** 2025-10-29T20:17:55Z + +**Changes:** +3 -0 (1 files) + +**Labels:** `maintenance`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix CVE-2025-48924 + +The problematic dependency happens to be in a diamond dependency situation: + +```bash + ├── commons-lang3:3.18.0 <- correct version already + └── calcite-core:1.38.0 + └── aggdesigner-algorithm:6.0 + └── commons-lang:2.4 <- reported version +``` + +### Related Issues +* Resolve https://advisories.opensearch.org/advisory/CVE-2025-48924 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Both `common-lang3` 3.18 and `common-lang` 2.6+ can fix the CVE right? Is current change enforcing common-lang3 which rename its root package? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +> Both common-lang3 3.18 and common-lang 2.6+ can fix the CVE right? Is current change enforcing common-lang3 which rename its root package? + +Hi @dai-chen , + +You're correct that there are no patched versions in the commons-lang 2.x series (the advisory shows "Patched versions: None" for `commons-lang 2.0-2.6`). The only fix is upgrading to `commons-lang3 3.18.0+`. + +Yes, our dependency substitution does enforce `commons-lang3` which uses the renamed `org.apache.commons.lang3.*` package structure, but our testing confirms this works correctly. Since there's no way to patch the old `commons-lang 2.x` and we can't control what `aggdesigner-algorithm` depends on, dependency substitution to `commons-lang3 3.18.0` is the approach to resolve this. + + + +### @LantaoJin + + +@RyanL1997 can you explain how this problem happens again? I had excluded the commons-lang in https://github.com/opensearch-project/sql/pull/3895/files + + +### @RyanL1997 + + +> @RyanL1997 can you explain how this problem happens again? I had excluded the commons-lang in [#3895 (files)](https://github.com/opensearch-project/sql/pull/3895/files) + +Hi @LantaoJin, in the PR you mentioned: +```gradle +// Force commons-lang3 version when requested +resolutionStrategy.force 'org.apache.commons:commons-lang3:3.18.0' + +// Exclude commons-lang from calcite-core specifically +exclude group: 'commons-lang', module: 'commons-lang' +``` + +The fix you did works for direct dependencies and the calcite-core case you identified. However, `resolutionStrategy.force` only applies when `commons-lang3` is specifically requested, and the `exclude` only removes `commons-lang` from that one dependency. (In other word, the case I mentioned in this PR description will not be covered.) + +Based on your previous fix, mine provides global substitution - ANY request for `commons-lang:commons-lang` from anywhere in the transitive dependency tree gets automatically replaced with the safe version of `commons-lang3:3.18.0`. + +So your fix handled the known cases, and the substitution acts as a safety net for any other transitive dependencies that might still pull in the vulnerable 2.x version. Both pieces were needed for this complete fix. + + + + +--- + +# PR #4663: [Backport 2.19-dev] Fix bin nested fields issue + +**URL:** https://github.com/opensearch-project/sql/pull/4663 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-24T21:32:26Z + +**State:** MERGED + +**Merged:** 2025-10-26T11:36:09Z + +**Changes:** +94 -1 (2 files) + + +## Description + +Backport 85dc8d9eb655155b6790c20373cd4cb51275711a from #4606. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4662: [Backport 2.19-dev] Add `per_minute`, `per_hour`, `per_day` function support + +**URL:** https://github.com/opensearch-project/sql/pull/4662 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-24T16:49:51Z + +**State:** MERGED + +**Merged:** 2025-10-24T20:21:20Z + +**Changes:** +402 -10 (9 files) + +**Labels:** `enhancement`, `PPL` + + +## Description + +Backport 435e4b2210326a1951d43a3c0dfdbe4a5c69e152 from #4531. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4658: [Backport 2.19-dev] Pushdown sort aggregate metrics (#4603) + +**URL:** https://github.com/opensearch-project/sql/pull/4658 + +**Author:** @LantaoJin + +**Created:** 2025-10-24T09:51:16Z + +**State:** MERGED + +**Merged:** 2025-10-27T02:33:38Z + +**Changes:** +1253 -551 (41 files) + + +## Description + +(cherry picked from #4603 commit 5ebed8465dc9d498602a25b802207d63ee5030c8) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4657: Support push down sort after limit + +**URL:** https://github.com/opensearch-project/sql/pull/4657 + +**Author:** @qianheng-aws + +**Created:** 2025-10-24T08:42:21Z + +**State:** MERGED + +**Merged:** 2025-10-27T15:57:04Z + +**Changes:** +89 -32 (13 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Support push down sort after limit if there is only limit pushed, since PPL or other database all don't promise sequence for only limit operator. Thus it should be acceptable to transform `limit + sort` to `sort + limit`. + +However, we should avoid pushing down `sort`, if there is existing `sort` before `limit`. In such case, users intense to retrieve the Top-K values on the first sort fields from our index. The order will be overridden by the second `sort` if we keep pushing down it. So this PR prevents this case by detecting whether `top-k` is pushed down already. + +Since https://github.com/opensearch-project/sql/pull/4501 has always introduced a limit before sort for join or subsearch, which will block sort push down. This PR will also enhance these scenarios, especially for left join -- both sides will have limit and sort pushed down then. See `CalciteExplainIT::testExplainScalarCorrelatedSubqueryInSelect` + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4570 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java:258` + + +[nit] Better to check NoPushDownIT case. We may need different branch for these two test cases + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchSortIndexScanRule.java:56` + + +can we add a case for below ppl? +``` +source = t | head 100 | stats count() as cnt | sort cnt +``` +The `sort cnt` must not be pushed down through `head 100`. + +Cannot for following ppl either. +``` +source = t | head 100 | eval rand = rand() | sort rand +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchSortIndexScanRule.java:56` + + +Any `sort` expression evaluated after `limit` cannot pushed through `limit`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchSortIndexScanRule.java:56` + + +Both cases cannot push the sort and it's not related to whether there is limit in the PPL query. Their no push down reason are: +1. We cannot push down metric sort into agg unless the agg is nullable=false +2. We don't support script push down for sort currently. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchSortIndexScanRule.java:56` + + +could you add these cases in explain IT: +1. It's not the case of pushdown sort agg metrics due to there is no bucket. +2. got it. but better to add a explain IT. + + +## General Comments + + +### @LantaoJin + + +High level question: At the beginning, I thought you are targeting to push down a `sort` through `sysLimit`, so I renamed the PR title. But from the description, it seems a general purpose to push down the `sort` through `limit`? + + +### @qianheng-aws + + +> High level question: At the beginning, I thought you are targeting to push down a `sort` through `sysLimit`, so I renamed the PR title. But from the description, it seems a general purpose to push down the `sort` through `limit`? + +Yes, this a general PR for enhancement not only bug fix. + +In the process of finding solution to the issue https://github.com/opensearch-project/sql/issues/4570, I found it hard to only fix that specific issue because `syslimit` has already been replaced with calcite's `logicalsort` before optimizing. So we're not able to do anything with the `systemlimit` operator we added. + +But with this general enhancement, the original issue can also been addressed. + + +--- + +# PR #4656: [Backport 2.19-dev] [SQL/PPL] Fix the `count(*)` and `dc(field)` to be capped at MAX_INTEGER #4416 + +**URL:** https://github.com/opensearch-project/sql/pull/4656 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-24T08:36:46Z + +**State:** MERGED + +**Merged:** 2025-10-28T05:53:01Z + +**Changes:** +138 -125 (16 files) + + +## Description + +Backport d7b2c3548f748204e4a67f05bc44f1f69617dc97 from #4418. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4655: [Backport 2.19-dev] Followup: Change ComparableLinkedHashMap to compare Key than Value + +**URL:** https://github.com/opensearch-project/sql/pull/4655 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-24T06:27:07Z + +**State:** MERGED + +**Merged:** 2025-10-24T08:26:26Z + +**Changes:** +61 -36 (2 files) + + +## Description + +Backport d71967069d7396a7727b7417b8eb2899f48b319b from #4648. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4654: [Backport 2.19-dev] Mitigate the CI failure caused by 500 Internal Server Error (#4646) + +**URL:** https://github.com/opensearch-project/sql/pull/4654 + +**Author:** @LantaoJin + +**Created:** 2025-10-24T04:37:46Z + +**State:** MERGED + +**Merged:** 2025-10-24T05:34:57Z + +**Changes:** +20 -7 (4 files) + + +## Description + +(cherry picked from #4646 commit 2cb1d7349a13237fab3c084ca62e170a391f1927) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4653: [Backport 2.19-dev] Allow renaming group-by fields to existing field names + +**URL:** https://github.com/opensearch-project/sql/pull/4653 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-24T02:35:08Z + +**State:** MERGED + +**Merged:** 2025-10-28T05:51:01Z + +**Changes:** +147 -1 (2 files) + + +## Description + +Backport a86a5a75efcc310f0e4ef0c6352c2a3ea55f18d1 from #4586. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4652: [AUTO] Increment version to 3.3.2-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/4652 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-24T00:09:17Z + +**State:** MERGED + +**Merged:** 2025-10-27T21:29:12Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `maintenance`, `v3.3.2` + + +## Description + +- Incremented version to **3.3.2-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @peterzhuamazon + + +Pending artifacts build for js zip. + + +--- + +# PR #4651: Fix asc/desc keyword behavior for sort command + +**URL:** https://github.com/opensearch-project/sql/pull/4651 + +**Author:** @ritvibhatt + +**Created:** 2025-10-23T23:42:46Z + +**State:** MERGED + +**Merged:** 2025-10-28T23:27:25Z + +**Changes:** +333 -76 (9 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Fixed PPL sort command so that asc/desc keywords specify sort direction for individual fields instead of applying all fields in the sort command. (keywords work on individual fields ex ```sort field1 asc, field2 desc``` means field1 is ascending and field2 is descending) while preventing mixing of prefix (+/-) and suffix (asc/desc) syntax, and updated integration tests to match this behavior. + +Files Changed + +- ppl/src/main/antlr/OpenSearchPPLParser.g4: Updated grammar rules for sort command to support individual field ASC/DESC keywords +- ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java: Updated visitSortCommand() method to handle new ASC/DESC field syntax and prevent mixing of +/-and asc/desc sort syntax +- ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java: Modified expression building logic for sort field parsing and ensure consistent sort direction syntax usage across all sort fields +- ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java: Updated getSortArguments() method to process new field-level ASC/DESC syntax + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Hi @ritvibhatt , thanks for the change, and I left some minor comments. + + +## Review Comments + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:822` + + +Why do we need to define invalid syntax? wouldn't it fail parse without this? + + +### @ykmr1224 on `docs/user/ppl/cmd/sort.rst:28` + + +Do we need to prohibit this? Any concern to allow the mixture? + + +### @ykmr1224 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:263` + + +It is not critical, but please improve if you have capacity. +Following logics looks weird to me since visitPrefixSortField / visitSuffixSortField / visitDefaultSortField already know their type, but come to the same place and branch based on the context class. +I don't have clear idea to improve it, but I think there are better way to organize the code. + + +### @ritvibhatt on `ppl/src/main/antlr/OpenSearchPPLParser.g4:822` + + +It would fail the parse but added this to have a more clear error for when the two syntaxes are mixed + + +### @ritvibhatt on `docs/user/ppl/cmd/sort.rst:28` + + +Added because the syntax gets confusing if there's a lot of mixing of the 2 different syntaxes + + +### @ritvibhatt on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:263` + + +Yeah it is a bit confusing, I can take this up as a task in a follow up PR to improve the logic here + + +### @RyanL1997 on `docs/user/ppl/cmd/sort.rst:90` + + +nit: show -> shows + + +### @RyanL1997 on `ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java:179` + + +```suggestion +private static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionContext ctx) { + if (ctx.AUTO() != null) return createTypeArgument("auto"); + if (ctx.IP() != null) return createTypeArgument("ip"); + if (ctx.NUM() != null) return createTypeArgument("num"); + if (ctx.STR() != null) return createTypeArgument("str"); + return createTypeArgument(null); +} + +private static Argument createTypeArgument(String value) { + DataType dataType = value != null ? DataType.STRING : DataType.NULL; + return new Argument("type", new Literal(value, dataType)); +} +``` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4650: [Backport 2.19-dev] Support Automatic Type Conversion for REX/SPATH/PARSE Command Extraction (#4599) + +**URL:** https://github.com/opensearch-project/sql/pull/4650 + +**Author:** @penghuo + +**Created:** 2025-10-23T18:38:15Z + +**State:** MERGED + +**Merged:** 2025-10-23T21:52:43Z + +**Changes:** +635 -96 (11 files) + +**Labels:** `enhancement`, `PPL` + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/6f028a3ef18b92ecd1769e788fe867bdd74e3544 from https://github.com/opensearch-project/sql/pull/4599. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4649: [Backport 2.19-dev] Replace all dots in fields of table scan's PhysType + +**URL:** https://github.com/opensearch-project/sql/pull/4649 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-23T08:27:34Z + +**State:** MERGED + +**Merged:** 2025-10-24T05:40:31Z + +**Changes:** +238 -1 (4 files) + + +## Description + +Backport 8854488af17f482fca832f9ebfb3657c2a42c41f from #4633. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4648: Followup: Change ComparableLinkedHashMap to compare Key than Value + +**URL:** https://github.com/opensearch-project/sql/pull/4648 + +**Author:** @LantaoJin + +**Created:** 2025-10-23T08:08:37Z + +**State:** MERGED + +**Merged:** 2025-10-24T06:17:19Z + +**Changes:** +61 -36 (2 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Followup of https://github.com/opensearch-project/sql/pull/4629: `ComparableLinkedHashMap` as an util class, better to change it to compare key than value instead of values only. + +### Related Issues +Resolves #4339 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +### @qianheng-aws - COMMENTED + + +Can you elaborate more on the scenario of comparing key? + +I'm guessing whether it's for the case that different rows get different schema, like +``` +row1: {"c": 1, "a": 1, "b": 1} +row2: {"b": "2", "a": 1} +``` + +However, for this case, shouldn't we sort the map by their key first separately and then do comparison between them? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4647: [Backport 2.19-dev] Return comparable LinkedHashMap in `valueForCalcite()` of ExprTupleValue + +**URL:** https://github.com/opensearch-project/sql/pull/4647 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-23T07:17:03Z + +**State:** MERGED + +**Merged:** 2025-10-24T05:40:57Z + +**Changes:** +402 -1 (4 files) + + +## Description + +Backport 80fb6b75d75e0d13e752d65b8bd9a3f8480dcca3 from #4629. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4646: Mitigate the CI failure caused by 500 Internal Server Error + +**URL:** https://github.com/opensearch-project/sql/pull/4646 + +**Author:** @LantaoJin + +**Created:** 2025-10-23T06:22:31Z + +**State:** MERGED + +**Merged:** 2025-10-24T04:31:37Z + +**Changes:** +19 -4 (4 files) + +**Labels:** `infrastructure`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Today, the CI always fails with +``` +FAILURE: Build failed with an exception. + +* What went wrong: +Could not determine the dependencies of task ':opensearch-sql-plugin:shadowJar'. +> Could not resolve all dependencies for configuration ':opensearch-sql-plugin:runtimeClasspath'. + > Could not resolve com.github.babbel:okhttp-aws-signer:1.0.2. + Required by: + project :opensearch-sql-plugin > project :ppl > project :common + > Could not resolve com.github.babbel:okhttp-aws-signer:1.0.2. + > Could not get resource 'https://ci.opensearch.org/ci/dbc/snapshots/maven/com/github/babbel/okhttp-aws-signer/1.0.2/okhttp-aws-signer-1.0.2.pom'. + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/maven/com/github/babbel/okhttp-aws-signer/1.0.2/okhttp-aws-signer-1.0.2.pom'. Received status code 403 from server: Forbidden + + > Could not resolve com.github.babbel:okhttp-aws-signer:1.0.2. +Deprecated Gradle features were used in this build, making it incompatible with Gradle 9.0. + > Could not get resource 'https://ci.opensearch.org/ci/dbc/snapshots/lucene/com/github/babbel/okhttp-aws-signer/1.0.2/okhttp-aws-signer-1.0.2.pom'. + + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/lucene/com/github/babbel/okhttp-aws-signer/1.0.2/okhttp-aws-signer-1.0.2.pom'. Received status code 403 from server: Forbidden +You can use '--warning-mode all' to show the individual deprecation warnings and determine if they come from your own scripts or plugins. + > Could not resolve com.github.babbel:okhttp-aws-signer:1.0.2. + + > Could not get resource 'https://jitpack.io/com/github/babbel/okhttp-aws-signer/1.0.2/okhttp-aws-signer-1.0.2.pom'. +For more on this, please refer to https://docs.gradle.org/8.14/userguide/command_line_interface.html#sec:command_line_warnings in the Gradle documentation. + > Could not GET 'https://jitpack.io/com/github/babbel/okhttp-aws-signer/1.0.2/okhttp-aws-signer-1.0.2.pom'. Received status code 500 from server: Internal Server Error +3 actionable tasks: 3 executed +``` + +Add build cache to mitigate the failures when download `'https://jitpack.io'` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `build.gradle:97` + + +Does this mean only downloading "com.github.babbel" from source 'https://jitpack.io'? + + +### @LantaoJin on `build.gradle:97` + + +yes + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4643: Revert "Update grammar files and developer guide (#4301)" + +**URL:** https://github.com/opensearch-project/sql/pull/4643 + +**Author:** @ahkcs + +**Created:** 2025-10-22T23:41:46Z + +**State:** MERGED + +**Merged:** 2025-10-23T17:55:54Z + +**Changes:** +387 -743 (3 files) + +**Labels:** `maintenance` + + +## Description + +This reverts commit 00ea65e3f3adee9d13e1bdb4f4b7681d038d674c. + +### Description +The updated grammar files is blocking CI from spark repo as the spark repo is not ready for the latest grammar changes from SQL repo, I am reverting this change to unblock CI on spark repo first. + +### Related +* Relate #4301 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - APPROVED + + +Hi @ahkcs , could you also update the PR description by following the template? What was the original PR associated with that commit? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +CI is flaky + + +--- + +# PR #4641: [BugFix] Fix unexpected shift of extraction for `rex` with nested capture groups in named groups + +**URL:** https://github.com/opensearch-project/sql/pull/4641 + +**Author:** @RyanL1997 + +**Created:** 2025-10-22T21:51:51Z + +**State:** MERGED + +**Merged:** 2025-10-29T00:34:42Z + +**Changes:** +274 -63 (7 files) + +**Labels:** `bug`, `PPL`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Fix unexpected shift of extraction for `rex` with nested capture groups in named groups + +The rex command in PPL had a critical bug when using named capture groups that contained nested unnamed groups. This caused extracted field values to shift by one position, producing incorrect results. +- **Root Cause:** Code used sequential indices `1`, `2`, `3`... but nested groups create non-sequential indices `1`, `3`, `5`... +- **Solution:** Bypass index calculation entirely by using Java's native named group extraction (`matcher.group(groupName)`) + + +### Example of the Bug + +Query: +```bash +curl -X POST "localhost:9200/_plugins/_ppl" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "source=accounts | rex field=email \"(?(amber|hattie|nanette)[a-z]*)@(?(pyrami|netagy|quility))\\.(?(com|org))\" | fields user, domain, tld | head 1" + }' +``` + +**Expected Result (correct):** +```bash +["amberduke", "pyrami", "com"] +``` + +**Actual Result (wrong):** +```bash +["amberduke", "amber", "pyrami"] + +``` + +### Root Cause + +When Java's regex engine processes the pattern `(?(amber|hattie))[a-z]*`, it assigns group numbers to ALL capture groups (named and unnamed): + +Pattern: `(?(amber|hattie))[a-z]*@(?(pyrami|netagy))\.(?(com|org))` + +Group Assignment: +- Group 0: Entire match +- Group 1: `(?...)` ← Named group "user" +- Group 2: `(amber|hattie)` ← Unnamed nested group +- Group 3: `(?...)` ← Named group "domain" +- Group 4: `(pyrami|netagy)` ← Unnamed nested group +- Group 5: `(?...)` ← Named group "tld" +- Group 6: `(com|org)` ← Unnamed nested group + +Before the fix, the bug is in [`CalciteRelNodeVisitor.java` (lines 265-321)](https://github.com/opensearch-project/sql/blob/c6a3a70ac0d0e3ae3aa5eaefba88dab000bc70d5/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java#L265-L321). The code does: +```java + List namedGroups = RegexCommonUtils.getNamedGroupCandidates(patternStr); + // namedGroups = ["user", "domain", "tld"] + + for (int i = 0; i < namedGroups.size(); i++) { + extractCall = PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.REX_EXTRACT, + fieldRex, + context.rexBuilder.makeLiteral(patternStr), + context.relBuilder.literal(i + 1)); // ← WRONG: Assumes sequential named groups + // ... + } +``` +The code assumes named groups are at indices `1`, `2`, `3`, ... but the actual indices are `1`, `3`, `5`, ... due to the unnamed nested groups. + +With the above buggy logic: +- `REX_EXTRACT(field, pattern, 1)` → Gets Group 1 `(?...)` = "amberduke" → CORRECT +- `REX_EXTRACT(field, pattern, 2)` → Gets Group 2 `(amber|hattie)` = "amber" → WRONG +- `REX_EXTRACT(field, pattern, 3)` → Gets Group 3 `(?...)` = "pyrami" → WRONG + +The second and third extractions are off by one group because they hit the unnamed nested groups. + +```bash +LogicalProject( +user=[REX_EXTRACT($7, '(?(amber|hattie))[a-z]*@(?(pyrami|netagy))\.(?(com|org))', 1)], +domain=[REX_EXTRACT($7, '(?(amber|hattie))[a-z]*@(?(pyrami|netagy))\.(?(com|org))', 2)], -- Wrong! + tld=[REX_EXTRACT($7, '(?(amber|hattie))[a-z]*@(?(pyrami|netagy))\.(?(com|org))', 3)] -- Wrong! +) +``` + +### Related Issues +* Resolve https://github.com/opensearch-project/sql/issues/4466 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/udf/RexExtractMultiFunction.java:164` + + +I'm currently thinking about adding an error handling here + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:300` + + +I found there is a `namedGroups()` API in `Matcher` (since JDK 20?). If we can get correct index here, we don't need to modify the UDFs below? Alternatively we can move capture name -> index logic here from UDFs? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:300` + + +That is correct - the core issue is matching named groups to their correct indices, and `Pattern.namedGroups()` would be the perfect solution. However, I discovered that we're blocked by a +compatibility constraint: + +- `Pattern.namedGroups()` was introduced in JDK 20 +- We need backward compatibility with JDK 11/17 - for `2.19-dev` + +I agree that directly leveraging the `Pattern.namedGroups()` is the right architectural approach - we should definitely migrate to it when we fully upgrade to JDK 20+. At that point, it would be a simple one-line change in `CalciteRelNodeVisitor`. + + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:285` + + +as for now, I added a `TODO` here @dai-chen + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4640: [Backport 2.19-dev] Refactor JsonExtractAllFunctionIT and MapConcatFunctionIT (#4623) + +**URL:** https://github.com/opensearch-project/sql/pull/4640 + +**Author:** @ykmr1224 + +**Created:** 2025-10-22T19:07:37Z + +**State:** MERGED + +**Merged:** 2025-10-24T05:48:49Z + +**Changes:** +2 -188 (4 files) + +**Assignees:** @ykmr1224 + + +## Description + +Backport [e10997e](https://github.com/opensearch-project/sql/commit/e10997e54cacceeb0786e49bc04768b86be667d1) from https://github.com/opensearch-project/sql/pull/4623 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4639: [Backport 3.3] Onboarding new maven snapshots publishing to s3 (sql) + +**URL:** https://github.com/opensearch-project/sql/pull/4639 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-22T18:44:38Z + +**State:** MERGED + +**Merged:** 2025-10-22T18:45:24Z + +**Changes:** +23 -20 (6 files) + + +## Description + +Backport 279eb677dfdcc9bbceb4859f6d324ef77f5a232d from #4588. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4638: Fix Compile on 2.19-dev branch + +**URL:** https://github.com/opensearch-project/sql/pull/4638 + +**Author:** @ahkcs + +**Created:** 2025-10-22T18:41:17Z + +**State:** MERGED + +**Merged:** 2025-10-22T21:58:08Z + +**Changes:** +2 -2 (1 files) + + +## Description + +### Description +Fix compile issue introduced by #4604 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4633: Replace all dots in fields of table scan's PhysType + +**URL:** https://github.com/opensearch-project/sql/pull/4633 + +**Author:** @LantaoJin + +**Created:** 2025-10-22T11:00:15Z + +**State:** MERGED + +**Merged:** 2025-10-23T08:27:15Z + +**Changes:** +238 -1 (4 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Seems a bug of Calcite codegen in `PhysTypesImpl.project()`: +``` +builder.add(rowType.getFieldList().get(index)); +``` +This will add `endpoint.ip` as an variable in generated Java code: +``` +public static class Record2_1 implements java.io.Serializable { + public Integer endpoint.ip; // <== issue here + public String account_id; + public Record2_1() {} + ... +``` + +### Related Issues +Resolves #4619 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java:None` + + +nit: Let's write comment in English + + +### @LantaoJin on `opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java:None` + + +Sure, it was generated by GPT, deleted. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java:264` + + +[nit]seems can be merged with below code + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4630: [Backport 2.19-dev] Pushdown case function in aggregations as range queries (#4400) + +**URL:** https://github.com/opensearch-project/sql/pull/4630 + +**Author:** @yuancu + +**Created:** 2025-10-22T07:01:28Z + +**State:** MERGED + +**Merged:** 2025-10-24T08:45:45Z + +**Changes:** +2222 -162 (42 files) + + +## Description + +## Description + +Backport #4400 to 2.19-dev + +## Commit Message + +* WIP: implementing case range analyzer + + + +* Correct case analyzer + + + +* Create bucket aggregation parsers that supports parsing nested sub aggregations + + + +* Fix unit tests + + + +* Fix parsers to multi-range cases + + + +* Update leaf bucket parser + + + +* Unit test case range analyzer + + + +* Add explain ITs for pushing down case in aggregations + + + +* Update CaseRangeAnalyzerTest + + + +* Add a yaml test that replicates issue 4201 + + + +* Add integration tests for case in aggregation + + + +* Fix unit tests + + + +* Add a patch to CalcitePPLCaseFunctionIT + + + +* Migrate all composite aggregation parser usage to bucket aggregate parser + + + +* Create a parent abstract classes for BucketAggregationParsers + + + +* Remove an unnecessary bucket agg in AggregationQueryBuilder + + + +* Test pushing down case where there exists null values + + + +* Return empty in CaseRangeAnalyzer to unblock the rest pushdown +- Additionally test number as result expressions + + + +* Document limitations of pushding case as range queries + + + +* Make case pushdown a private method + + + +* Chores: remove unused helper method + + + +* Unify logics for creating nested aggregations + + + +* Remove a note in condition.rst + + + +* Optmize range aggregation + + + +* Ignore testNestedAggregationsExplain when pushdown is disabled + + + +* Fix explain ITs after merge + + + +--------- + + +(cherry picked from commit 18ab4dc9a02589e4348d67fa4b38a66d27ad7f59) + + +### Related Issues +Resolves #4201 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4629: Return comparable LinkedHashMap in `valueForCalcite()` of ExprTupleValue + +**URL:** https://github.com/opensearch-project/sql/pull/4629 + +**Author:** @LantaoJin + +**Created:** 2025-10-22T06:56:39Z + +**State:** MERGED + +**Merged:** 2025-10-23T07:16:42Z + +**Changes:** +402 -1 (4 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Fix `class java.util.LinkedHashMap cannot be cast to class java.lang.Comparable` in `dedup` struct field + +### Related Issues +Resolves #4339 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/data/utils/ComparableLinkedHashMap.java:None` + + +Only comparing the first element? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/data/utils/ComparableLinkedHashMap.java:None` + + +changed to compare recursively + + +### @yuancu on `core/src/test/java/org/opensearch/sql/utils/ComparableLinkedHashMapTest.java:62` + + +Can the keys of ExprTupleValues be ignored? Or are they guaranteed to come with the same order? + + +## General Comments + + +### @LantaoJin + + +@qianheng-aws I filed a followup [PR](https://github.com/opensearch-project/sql/pull/4648). The current ComparableLinkedHashMap only compares the values. Change it to compare key than value. + + +--- + +# PR #4628: [Backport 2.19-dev] Optimize pushdown script size with necessary fields per expression (#4615) + +**URL:** https://github.com/opensearch-project/sql/pull/4628 + +**Author:** @songkant-aws + +**Created:** 2025-10-22T06:54:53Z + +**State:** MERGED + +**Merged:** 2025-10-22T09:03:30Z + +**Changes:** +482 -146 (47 files) + + +## Description + +### Description +Backport #4615 to 2.19-dev branch. + +### Related Issues + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4627: [Backport 2.19-dev] Update GEOIP function to support IP types as input (#4613) + +**URL:** https://github.com/opensearch-project/sql/pull/4627 + +**Author:** @yuancu + +**Created:** 2025-10-22T06:52:17Z + +**State:** MERGED + +**Merged:** 2025-10-23T03:07:42Z + +**Changes:** +74 -29 (5 files) + + +## Description + +### Description +Backport #4613 to 2.19-dev + +(cherry picked from commit bdd42bc0aea3c2a34b8d5cf4f4a5a80a8177f9b2) + +### Related Issues +Resolves #4468 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4626: [Backport 2.19-dev] Support serializing & deserializing UDTs when pushing down scripts (#4245) + +**URL:** https://github.com/opensearch-project/sql/pull/4626 + +**Author:** @yuancu + +**Created:** 2025-10-22T05:20:41Z + +**State:** MERGED + +**Merged:** 2025-10-22T06:31:56Z + +**Changes:** +1521 -14 (16 files) + + +## Description + +# Description + +Backport #4245 to 2.19-dev + +# Commit Message + +* Support serializing & deserializing UDTs + + + +* Update explain ITs + + + +* Push down UDT types as string types for comparison operators + + + +* Separate test cases and add an ignored IT + + + +* Correct the handling of UDT in CalciteScriptEngine by substituting calcite's type factory with OpenSearchTypeFactory + + + +* Fix deserialization for IP + + + +* Remove testExplainPushDownScriptsContainingUDT in v2 + + + +* Enable testLimitAfterAggregation in CalcitePPLAggregationIT + + + +* Unit test serialize map and array types + + + +* Fix deeper level deserialization of UDTs + + + +* Add a yaml test for issue 4322 + + + +* Add a test case for issue 4340 + + + +* Remove redundant classes + + + +--------- + + +(cherry picked from commit 69a718bae67a859b1e0c43d2d5856fee5d3a9e29) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #4063 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @songkant-aws + + +My change also touches some files introduced by this PR. Will wait for its merge and resolve potential conflicts together. + + +--- + +# PR #4624: [Backport 2.19-dev] Pushdown distinct count approx (#4614) + +**URL:** https://github.com/opensearch-project/sql/pull/4624 + +**Author:** @xinyual + +**Created:** 2025-10-22T02:16:35Z + +**State:** MERGED + +**Merged:** 2025-10-22T21:49:56Z + +**Changes:** +103 -75 (6 files) + + +## Description + +### Description +backport #4614 to 2.19-dev + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4623: Refactor JsonExtractAllFunctionIT and MapConcatFunctionIT + +**URL:** https://github.com/opensearch-project/sql/pull/4623 + +**Author:** @ykmr1224 + +**Created:** 2025-10-22T00:27:07Z + +**State:** MERGED + +**Merged:** 2025-10-22T16:52:42Z + +**Changes:** +2 -190 (4 files) + +**Labels:** `PPL`, `maintenance`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + +**Assignees:** @ykmr1224 + + +## Description + +### Description +- Refactor JsonExtractAllFunctionIT and MapConcatFunctionIT to utilize common base class +- Some more reduction of redundant code +- Pure refactoring and no production logic change. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4621: Fix: Support Alias Fields in MIN, MAX, FIRST, LAST, and TAKE Aggregations + +**URL:** https://github.com/opensearch-project/sql/pull/4621 + +**Author:** @ahkcs + +**Created:** 2025-10-21T21:18:28Z + +**State:** MERGED + +**Merged:** 2025-10-24T22:59:02Z + +**Changes:** +357 -146 (38 files) + +**Labels:** `bug`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + MIN, MAX, FIRST, LAST, and TAKE aggregations returned null or empty results when used with alias fields (e.g., @timestamp aliasing created_at) because they used .fetchSource() which cannot access alias fields. + +Changes: +``` + - Aggregation builders: Changed from .fetchSource() to .fetchField() to use OpenSearch's fields API + - Response parsers: Enhanced TopHitsParser and ArgMaxMinParser to extract values from hit.getFields() +``` + +``` + # Before: Returns null + source=index | stats MIN(@timestamp), MAX(@timestamp) + + # After: Returns correct min/max values + {"MIN(@timestamp)": "2024-01-01T10:00:00.000Z", "MAX(@timestamp)": "2024-01-03T10:00:00.000Z"} +``` +### Related Issues +* Resolve #4595 + + + +## Reviews + + +### @Swiddis - DISMISSED + + +overall lgtm + + +## Review Comments + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SearchHitUtils.java:None` + + +suggestion: Return `Optional` for type safety + + +### @dai-chen on `docs/user/ppl/cmd/stats.rst:None` + + +Is this expected? As discussed, shall we keep previous semantic of each function? + + +### @dai-chen on `integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest.json:None` + + +np: the pretty format is lost? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +Is this class for V2 engine and ArgMaxMinParser for V3 Calcite? Are we fixing both? + + +### @ahkcs on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +Yes we are fixing both. +TopHitsParser is used in both V2 and V3 engines +ArgMaxMinParser is V3 Calcite only for ARG_MAX/ARG_MIN functions + + +### @ahkcs on `integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest.json:None` + + +Updated to yaml format + + +### @ahkcs on `docs/user/ppl/cmd/stats.rst:None` + + +Changed to avoid breaking change + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:420` + + +I think we should prefer fetch fields API. This will break for certain field types, e.g., text, nested. Please double check. + + +### @ahkcs on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:420` + + +Updated to use fetch fields API + + +## General Comments + + +### @ahkcs + + +@ritvibhatt Please take a look at the changes in `ArgMaxMinParser` to see if makes sense to you + + +--- + +# PR #4618: Support serializing external OpenSearch UDFs at pushdown time + +**URL:** https://github.com/opensearch-project/sql/pull/4618 + +**Author:** @yuancu + +**Created:** 2025-10-21T10:41:22Z + +**State:** MERGED + +**Merged:** 2025-11-03T08:23:27Z + +**Changes:** +141 -18 (9 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Previously, externally registered OpenSearch UDFs can not be serialized as they are not registered in `RelJsonSerializer`. This PR collects these UDFs in a `SqlOperatorTable` and registers it in `RelJsonSerializer`. + +### Blocker + +Blocked by #2813 (or potentially other issues that restrict groupping by struct fields) + +UDF is serialized, but grouping by a generated struct seems to be problematic after pushdown. + +- `source=weblogs | where host='1.2.3.4' | eval info = geoip('my-datasource', host) | stats count() by info`: + ```json + { + "error": { + "reason": "There was internal problem at backend", + "details": "java.sql.SQLException: exception while executing query: class java.lang.String cannot be cast to class java.util.Map (java.lang.String and java.util.Map are in module java.base of loader 'bootstrap')", + "type": "RuntimeException" + }, + "status": 500 + } + ``` + In this case, the result map is converted to a string when used as a group key. + +
+ Directly running the DSL with the script gives the bucket key as string + + Query: + ```json + {"from":0,"size":0,"timeout":"1m","query":{"term":{"host":{"value":"1.2.3.4","boost":1.0}}},"_source":{"includes":["host"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"info":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAknsKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfSVAiLAogICAgICAidHlwZSI6ICJPVEhFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImhvc3QiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQCw3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJHRU9JUCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAibGl0ZXJhbCI6ICJteS1kYXRhc291cmNlIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiTUFQIiwKICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgImtleSI6IHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAicHJlY2lzaW9uIjogLTEKICAgIH0sCiAgICAidmFsdWUiOiB7CiAgICAgICJ0eXBlIjogIkFOWSIsCiAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJzY2FsZSI6IC0yMTQ3NDgzNjQ4CiAgICB9CiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAEaG9zdH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAAklQeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1761646601217183000}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}} + ``` + + Result: + ```json + { + "took": 137, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "composite_buckets": { + "after_key": { + "info": "{continent_name=Oceania, country_iso_code=AU, country_name=Australia, location=-33.4940,143.2104, time_zone=Australia/Sydney}" + }, + "buckets": [ + { + "key": { + "info": "{continent_name=Oceania, country_iso_code=AU, country_name=Australia, location=-33.4940,143.2104, time_zone=Australia/Sydney}" + }, + "doc_count": 1 + } + ] + } + } + } + ``` + +
+ +
+ For reference, the result of `source=weblogs | where host='1.2.3.4' | eval info = geoip('my-datasource', host)` is: + +```json +{ + "schema": [ + { + "name": "host", + "type": "ip" + }, + { + "name": "method", + "type": "string" + }, + { + "name": "bytes", + "type": "string" + }, + { + "name": "response", + "type": "string" + }, + { + "name": "url", + "type": "string" + }, + { + "name": "info", + "type": "struct" + } + ], + "datarows": [ + [ + "1.2.3.4", + "GET", + "1234", + "200", + "/history/voyager1/", + { + "continent_name": "Oceania", + "country_iso_code": "AU", + "country_name": "Australia", + "location": "-33.4940,143.2104", + "time_zone": "Australia/Sydney" + } + ] + ], + "total": 1, + "size": 1 +} +``` +
+ +### Workaround + +Instead of directly groupping by the struct generated by `geoip`, I created tests that group results by sub-fields of result struct of `geoip` function. E.g. if the result object `info` `{"str": "a string", "num": 1}`, I group results by `info.str`. + +### Related Issues +Resolves #4478 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4617: [Backport 2.19-dev] Fix filter parsing failure on date fields with non-default format + +**URL:** https://github.com/opensearch-project/sql/pull/4617 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-21T10:28:45Z + +**State:** MERGED + +**Merged:** 2025-10-22T09:38:18Z + +**Changes:** +199 -34 (10 files) + + +## Description + +Backport 60b7d98422edb0df46103fc96f8da443048281be from #4616. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4616: Fix filter parsing failure on date fields with non-default format + +**URL:** https://github.com/opensearch-project/sql/pull/4616 + +**Author:** @qianheng-aws + +**Created:** 2025-10-21T07:21:07Z + +**State:** MERGED + +**Merged:** 2025-10-21T10:28:22Z + +**Changes:** +199 -32 (10 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Forcing adding `'format': 'date_time'` for RangeQuery on date time related fields, so we can decouple the format of literal value from that format in our mapping. And we also need to change TermQuery to RangeQuery for `=` or `!=` operator since only RangeQuery support the `format` parameter. + +The above solution is what `Calcite` applies for ES, but as it use `GregorianCalendar` for date time literal while we use `String`, we skips the logic of adding format. + +### Related Issues +Resolves #4490 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4615: Optimize pushdown script size with necessary fields per expression + +**URL:** https://github.com/opensearch-project/sql/pull/4615 + +**Author:** @songkant-aws + +**Created:** 2025-10-21T06:43:11Z + +**State:** MERGED + +**Merged:** 2025-10-21T09:24:57Z + +**Changes:** +480 -139 (47 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This optimization aims to resolve #4547, #4597. With necessary fields to be serialized per expression, script size will be minimized by rewriting expression input indices and pruning field types. It will mitigate the issue of exceeding script size limit for queries over wide schema indices. + +### Related Issues +Resolves #4547 , resolves #4597 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4614: Pushdown distinct count approx + +**URL:** https://github.com/opensearch-project/sql/pull/4614 + +**Author:** @xinyual + +**Created:** 2025-10-21T06:35:39Z + +**State:** MERGED + +**Merged:** 2025-10-21T10:16:40Z + +**Changes:** +32 -6 (6 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +This PR enable pushdown for distinct count approx. Since the cardinality already uses [hyperLogLog++ algorith](https://docs.opensearch.org/latest/aggregations/metric/cardinality/) as our expectation., so we directly push down it to be cardinality + +### Related Issues +#4533 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + +btw. I randomly found that `explain_eventstats_avg.json` is no longer used by any test + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4613: Support IP-typed fields as arguments of GEOIP function with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4613 + +**Author:** @yuancu + +**Created:** 2025-10-21T06:02:35Z + +**State:** MERGED + +**Merged:** 2025-10-21T13:38:56Z + +**Changes:** +61 -19 (4 files) + +**Labels:** `bug`, `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Previously, [`GEOIP` function](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/ip.rst#geoip) supports only string as its input type for IPs. However, there exists IP type in PPL, and it's counter-intuitive to not supporting it as a valid input type. + +This PR makes GEOIP function accept IP input types. Please note that this correction is only valid when Calcite is enabled. V2's behavior remains unchanged. + +### Related Issues +Resolves #4468 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GeoIpFunction.java:41` + + +should be following ? Seems not a breaking change from the `ip.rst`, but why the description here is different. +``` + *.
  • (STRING, STRING) -> MAP + *
  • (STRING, STRING, STRING) -> MAP + *
  • (STRING, IP) -> MAP + *
  • (STRING, IP, STRING) -> MAP +``` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GeoIpFunction.java:41` + + +Now I modified the underlying function to accept IP only. + +It still accepts strings as inputs with implicit type coercion. + +If it gets string as input, it will try to cast it to IP first. This guarantees that the input is a valid IP address. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4611: Add permissive mode setting + +**URL:** https://github.com/opensearch-project/sql/pull/4611 + +**Author:** @ykmr1224 + +**Created:** 2025-10-21T02:52:21Z + +**State:** MERGED + +**Merged:** 2025-10-27T22:48:48Z + +**Changes:** +404 -43 (15 files) + +**Labels:** `PPL`, `maintenance`, `calcite` + +**Assignees:** @ykmr1224 + + +## Description + +### Description +- Add permissive mode setting, and enable **only in standalone integ-test** + - When permissive mode is enabled, the fields not found in the index mapping will be collected and stored in `_MAP` field typed as `MAP` (dynamic fields). +- This PR adds only most primitive support to handle dynamic fields. + - I will raise separate PRs to provide wider support. + +### Related Issues +Permissive mode RFC: https://github.com/opensearch-project/sql/issues/4349 +Dynamic fields RFC: https://github.com/opensearch-project/sql/issues/4433 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +After introducing map functions, we're beginning to touch the "core" logic. Shall we consider using feature branch since there is no `.x` branch today? + + +## Review Comments + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:184` + + +Does https://github.com/opensearch-project/sql/issues/3459 solved? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsCommandIT.java:59` + + +What is explain results? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/storage/StorageEngine.java:20` + + +getPermissiveAwareTable is temporay change? if yes, add TODO. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:124` + + +only the top level are extracted? what if field name is field.f1.f2? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/storage/StorageEngine.java:20` + + +This will be kept to avoid permissive is applied to SQL. +(Only Calcite PPL implementation will use this method to allow permissive mode) + + +### @ykmr1224 on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:124` + + +Hmm, do we want to flatten nested data? + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsCommandIT.java:59` + + +Add explain validation in the tests. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:184` + + +#3459 is not solved yet. I think #3459 is not directly related to this type mapping. + +This change is needed to set `_MAP` as Map. +It seems that currently object is converted to array of map when read like: `[{"attr1": 1, "attr2": "v"}], "otherValue"`. + + + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/PermissiveOpenSearchIndex.java:25` + + +Is this line the only reason to add this new class or more future changes? Because I see `OpenSearchIndex` already has `Settings`. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLPermissiveIntegTestCase.java:17` + + +Instead of a base class, just wondering should we have a test suite similar as `CalciteNoPushdownIT` to run all IT w/wo permissive mode? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/plan/DynamicFieldsConstants.java:12` + + +np: any more constants later? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsCommandIT.java:20` + + +Could you add test/assertion for SparkSQL now or later? Just want to make sure this feature is portable to other engine. + + +### @Swiddis on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:38` + + +thought: I'd like if this could be configured per-query instead of cluster-global + +If 99.9% of my queries are static, I don't want to slow all of them down for one dynamic query + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:304` + + +Is this change already covered by test? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:184` + + +Previously it's `Map`? No test change needed for this? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:407` + + +Is this change related to some other issue or required by dynamic field? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:407` + + +This is needed for dynamic fields. Dynamic fields require alias to expose the item name as field name into result record schema. + + +### @ykmr1224 on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:38` + + +Can you give your inputs here? https://github.com/opensearch-project/sql/issues/4349 +I think we can introduce per query or per index configuration as needed later. + + +### @ykmr1224 on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/PermissiveOpenSearchIndex.java:25` + + +I made this class to avoid the setting is applied to SQL query as well, since OpenSearchIndex is shared. +Once we enable permissive mode to SQL (and deprecate legacy engines), we can unify it. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLPermissiveIntegTestCase.java:17` + + +Yes, we will definitely need them, but for now many tests will fail since this change cover very limited case. (Once we establish good foundation, I will introduce such IT) + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/plan/DynamicFieldsConstants.java:12` + + +I don't expect other constant. (I didn't find good place to put this, and made this class) + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java:304` + + +Yes, it is covered in the test when dynamic field is selected in the result. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalciteDynamicFieldsCommandIT.java:20` + + +Sure, let me add in the following PRs. + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/PermissiveOpenSearchIndex.java:25` + + +I see. Our SQL and PPL shared both the index class and cluster setting. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLPermissiveIntegTestCase.java:17` + + +Np. But I'm thinking only add IT that can pass to suite classes list. + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:124` + + +We may add a IT to clarify this either a limitation or by design? + + +## General Comments + + +### @ykmr1224 + + +> After introducing map functions, we're beginning to touch the "core" logic. Shall we consider using feature branch since there is no `.x` branch today? + +Very good point. I was on the fence as it could lead to huge conflict when merging. +But made feature branch and switched the merge destination. + + +--- + +# PR #4610: [Backport 2.19-dev] Support refering to implicit `@timestamp` field in span + +**URL:** https://github.com/opensearch-project/sql/pull/4610 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-21T02:50:26Z + +**State:** MERGED + +**Merged:** 2025-10-21T09:20:29Z + +**Changes:** +385 -41 (9 files) + + +## Description + +Backport c30d5d01c36b7c959d7b2a290c319c4a8f0e8367 from #4138. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4609: [Backport 2.19-dev] Support format=yaml in Explain API (#4446) + +**URL:** https://github.com/opensearch-project/sql/pull/4609 + +**Author:** @penghuo + +**Created:** 2025-10-20T23:31:32Z + +**State:** MERGED + +**Merged:** 2025-10-22T05:41:53Z + +**Changes:** +461 -242 (17 files) + +**Labels:** `PPL` + +**Assignees:** @penghuo + + +## Description + +### Description +https://github.com/opensearch-project/sql/pull/4446 +https://github.com/opensearch-project/sql/pull/4608 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4608: Fix compile issue in main branch + +**URL:** https://github.com/opensearch-project/sql/pull/4608 + +**Author:** @penghuo + +**Created:** 2025-10-20T22:35:15Z + +**State:** MERGED + +**Merged:** 2025-10-20T23:17:17Z + +**Changes:** +34 -34 (2 files) + +**Labels:** `PPL`, `maintenance`, `bugFix` + +**Assignees:** @penghuo + + +## Description + +### Description +Fix compile issue introduce by https://github.com/opensearch-project/sql/commit/d6a9719f3c8d73f1b274842c5b83e7df7df5d9ca + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +Manual backport https://github.com/opensearch-project/sql/pull/4609 + + +--- + +# PR #4606: Fix bin nested fields issue + +**URL:** https://github.com/opensearch-project/sql/pull/4606 + +**Author:** @ahkcs + +**Created:** 2025-10-20T20:59:44Z + +**State:** MERGED + +**Merged:** 2025-10-24T21:32:05Z + +**Changes:** +94 -1 (2 files) + +**Labels:** `bug`, `PPL`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +Fixed bin command failing on nested fields (e.g., resource.attributes.telemetry.sdk.version). +Updated `CalciteRelNodeVisitor.projectPlusOverriding()` to use prefix matching instead of exact matching for nested field names. +### Related Issues +* Resolve #4482 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Let's extract for readability / maintainability. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Does this mean it could override all the original fields start with `originalName + "."`? (like when original fields include `resource.a`, `resource.b`, etc. then all of them will be removed? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Thanks for the question! No - originalFieldNames only contains top-level field names like "resource", not nested paths like "resource.a" or "resource.b". + +So when binning resource.c.nested, the filter only matches the top-level "resource" field (via startsWith("resource.")). This causes the entire resource struct to be removed and flattened, with the binned field replacing the original nested value. + +Only one top-level struct gets matched and processed, regardless of how many nested fields it contains. + +You can refer to this IT test I added: `testBinWithNestedFieldWithoutExplicitProjection` + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Extracted + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Field name could contain `.` and `newName.startsWith("resource.")` could match with multiple fields in my understanding. +I am unsure how much we currently support field name containing `.`, but [`QualifiedNameResolver`](https://github.com/opensearch-project/sql/blob/c30d5d01c36b7c959d7b2a290c319c4a8f0e8367/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java#L105-L114) implements longest match logic to decide the referred field in case qualified name contains multiple dots. + + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +I think it would be an unusual case for field name to contain mutliple `.`? +Looking at the end-to-end flow, originalFieldNames comes from: +context.relBuilder.peek().getRowType().getFieldNames() + +This returns only the direct fields of the current RelNode's row type. For a schema with nested structures, the row type would have: + - Top-level field: "resource" (type: STRUCT) + - Top-level field: "severityNumber" (type: INT) + +It would not contain "resource.a", "resource.b", etc. as separate top-level fields - those only exist as nested fields within the resource struct definition. + +The scenario where multiple top-level fields share a prefix (like resource, resource.a, resource.b all being top-level) would require an unusual schema design(I think we can rarely meet this?) + + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:881` + + +is there any other place using this pattern of detection? Or if there is a better way to detect if it is a nested field or not? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:881` + + +In CalciteRelNodeVisitor.java, the visitFlatten() method uses the same startsWith() pattern: +``` + public RelNode visitFlatten(Flatten node, CalcitePlanContext context) { + visitChildren(node, context); + RelBuilder relBuilder = context.relBuilder; + String fieldName = node.getField().getField().toString(); + // Match the sub-field names with "field.*" + List fieldsToExpand = + relBuilder.peek().getRowType().getFieldList().stream() + .filter(f -> f.getName().startsWith(fieldName + ".")) // ← Same pattern! + .toList(); + // ... rest of method + } +``` + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +OpenSearch mapping key could include `.`, and also we can easily introduce field which contains `.` with `eval` command. +Why can't we directly match with the actual field we used? +`projectPlusOverriding` is used for multiple commands, and could have side effect to other commands. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +>Why can't we directly match with the actual field we used? + +We DO directly match. The logic uses both: + 1. Exact match: newName.equals(originalName) + 2. Struct match: newName.startsWith(originalName + ".") + + This exact logic is already used by eval. Example: + source=telemetry | eval `resource.temp` = 1 | bin `resource.attributes.telemetry.sdk.version` span=2 + + Step 1 - Eval: + - "resource.temp".startsWith("resource.") → true → flattens struct + - Schema becomes: ["resource.attributes.telemetry.sdk.enabled", "resource.attributes.telemetry.sdk.language", ..., "resource.temp"] + + Step 2 - Bin: + - Schema already flattened, exact match works: "resource.attributes.telemetry.sdk.version".equals("resource.attributes.telemetry.sdk.version") → true + +Both commands use the same logic safely. Added test (testBinWithEvalCreatedDottedFieldName) to confirm no edge case issues + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +I think @ykmr1224 means if we can directly know the sub field of a struct and project it, instead of relying on the crispy name matching. E.g. if a struct is `info {a: string, b: int}`, we can directly project `info, info.a, info.b` instead of relying on pattern `info.*` to match the names, because there may be another schema `info: string, info.a integer`, where `info.a` is not a sub-field of `info`. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +@yuancu Thanks for the clarification! However, I think the current logic handles this situation, you can refer to this newly added integration test using eval command to test: `testBinWithEvalCreatedDottedFieldName` + +For the edge case you mentioned, I think it would be problematic if we have two `info.a` with different type because that would cause confusion for the commands. For example, when we are using stats count(info.a), which `info.a` is it referring to? For other cases where there's no naming overlap, I think you can refer to the eval test case + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4605: issue #4514 tonumber function as part of roadmap #4287 + +**URL:** https://github.com/opensearch-project/sql/pull/4605 + +**Author:** @asifabashar + +**Created:** 2025-10-20T19:23:31Z + +**State:** MERGED + +**Merged:** 2025-12-11T21:28:43Z + +**Changes:** +686 -2 (11 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description +tonumber(, ) +This function converts a string to a number. +Usage +You can use this function with the eval and where commands, in the WHERE clause of the from command, and as part of evaluation expressions with other commands. +The argument can be a string or the name of a field that contains a string. If the string contains a decimal point ( . ), then the tonumber function converts it to a double. Otherwise, the function converts the string to an integer. + +Be aware that integers are supported differently in different product contexts: +The argument is optional. It defines the base of the number in the argument. It defaults to 10, which corresponds to the decimal system. You can set to a number between 2 and 36, inclusive. + +If the tonumber function cannot parse a literal string to a number, the function returns an error. + +Basic examples +The following example converts the string values from the store_sales field to numbers, and then stores the numbers in a field named n. This example uses the default base of 10. +... | eval n=tonumber(store_sales) + +The following example takes the hexadecimal number and uses a base of 16 to return the number "164". + +... | eval n=tonumber("0A4",16) +Resolves #4514 +### Related Issues +Resolves #4514 + + + +### Check List +- [ x] New functionality includes testing. +- [x ] New functionality has been documented. +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/functions/conversion.rst:None` + + +Add newline between description and example. https://github.com/asifabashar/sql/blob/1ff7d4601c71d339e99243e4cbf6c7bad146932b/docs/user/ppl/functions/conversion.rst#tonumber + + +### @penghuo on `docs/user/ppl/functions/conversion.rst:None` + + +@ritvibhatt please help review doc format. + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +define as eval function, take other function impl as example. + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:459` + + +reuse visitEvalFunctionCall which define generic way to access arguments. + +take a look how replace function works, it is similar to tonumber use case. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +revert this change. + + +### @ritvibhatt on `docs/user/ppl/functions/conversion.rst:None` + + +Can get rid of ```with format type``` and extra space between ```argument``` and ```to``` + +```suggestion +Usage: tonumber(string, [base]) converts the value in first argument to provided base type string in second argument. If second argument is not provided, then it converts to base 10 number representation. +``` + + +### @ritvibhatt on `docs/user/ppl/functions/conversion.rst:None` + + +Add new line between usage and return type + + +### @asifabashar on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +defined as eval function in latest commit + + +### @asifabashar on `docs/user/ppl/functions/conversion.rst:None` + + +added new line + + +### @asifabashar on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +defined as eval function in latest commit + + +### @asifabashar on `docs/user/ppl/functions/conversion.rst:None` + + +added newline + + +### @asifabashar on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +reverted + + +### @penghuo on `docs/user/ppl/functions/conversion.rst:None` + + +using source=people, align with other test in same doc. + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +Add similar test to verify logical plan and Sql query. +e.g. https://github.com/opensearch-project/sql/pull/4497/files#diff-d8afd9bd8dc0f2e5ab069d8b37842fb0b41f6e582bd9dc63c248b2d81dd67674 + + +### @penghuo on `core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java:None` + + +Any standard we are following? +Seems SPL does not support tonumber("AB.B", 16) + +If not, we should define the behaviour of tonumber as +1. if base=10, convert to int / double, depend on whether there is `.` +2. if base!=10, always convert to long, e.g. `new BigInteger("FF", 16).` + + +### @asifabashar on `docs/user/ppl/functions/conversion.rst:None` + + +changed to source=people + + +### @asifabashar on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +added similar tests + + +### @asifabashar on `core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java:None` + + +made the changes + + +### @penghuo on `core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java:None` + + +@asifabashar what is change? +I still see test +``` + @Test + void testToNumberWithDecimalBase16() { + assertEquals(255L, ToNumberFunction.toNumber("FF.8", 16)); + assertEquals(16L, ToNumberFunction.toNumber("10.4", 16)); + assertEquals(171L, ToNumberFunction.toNumber("AB.B", 16)); + } +``` + + +### @asifabashar on `core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java:None` + + +sorry, fixed it now + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java:None` + + +simpliy code logic, [not just remove UT](https://github.com/opensearch-project/sql/pull/4605/commits/b5346a6dcfb385f040cc103f8ad480678dc0156a). + +* if base=10, convert to int / double, depend on whether numStr contain `.` +* if base!=10, convert to long, e.g. new BigInteger("FF", base) +* return null value is numStr is malformatted + + +### @asifabashar on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java:None` + + +applied the changes + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java:None` + + +What is lower bound and upper bound tonumber supported. Could u add UT and update doc. e.g. for base=16, [-7FFFFFFFFFFFFFFF, 7FFFFFFFFFFFFFFF]? + + + +### @asifabashar on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java:None` + + +added unittest and documentation. + + +### @dai-chen on `docs/user/ppl/functions/conversion.rst:None` + + +Is `int_value` an integer or double? + + +### @dai-chen on `docs/user/ppl/functions/conversion.rst:None` + + +It's not converting 1st arg to provided base type in 2nd arg, right? The 2nd arg describe the base of 1st arg instead? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java:None` + + +useless? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java:None` + + +Is this by design? + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java:None` + + +No IT? I think we need to also cover NULL and error case for invalid input. + + +### @asifabashar on `docs/user/ppl/functions/conversion.rst:None` + + +Applied fixes so it does not show decimal .0 at the end for integer + + +## General Comments + + +### @asifabashar + + +@penghuo please review + + +### @asifabashar + + +@penghuo please review + + +### @asifabashar + + +@penghuo Please review. + + +### @penghuo + + +Please fix IT. + + +### @asifabashar + + +Please help set PR label. + + +### @penghuo + + +Looks good to me, Please merge conflict. + +@dai-chen , please take another look. + + +### @asifabashar + + +@dai-chen please review + + +### @asifabashar + + +@dai-chen @penghuo Please check if anything else is needed and what is the next step. + + +### @dai-chen + + +> @dai-chen @penghuo Please check if anything else is needed and what is the next step. + +Hi @asifabashar , I added comment here: https://github.com/opensearch-project/sql/pull/4605#discussion_r2600254535 + + +### @asifabashar + + +@dai-chen please review. I have made return type double. Current test failures are unrelated . + + +### @asifabashar + + +@dai-chen please review. I have made return type double. Current test failures are unrelated and resolved conflicts + + +### @asifabashar + + +@dai-chen @penghuo I have fixed DCO. + + +### @asifabashar + + +@penghuo we need 2 reviewers , please help review. + + +--- + +# PR #4604: [Backport 2.19-dev] Add replace command with Calcite (#4451) + +**URL:** https://github.com/opensearch-project/sql/pull/4604 + +**Author:** @ahkcs + +**Created:** 2025-10-20T17:25:53Z + +**State:** MERGED + +**Merged:** 2025-10-22T18:15:41Z + +**Changes:** +1082 -9 (25 files) + + +## Description + +* Add replace command with Calcite + +Also fixed discrepancy of escape handling between main and 2.19-dev + + ## Changes + + 1. **AstBuilder.java**: Fixed compilation error (`.toList()` → `.collect(Collectors.toList())`) + 2. **StringUtils.java**: Restored escape handling (`\`, `\'`, `\"`) that was removed in 2.x Spotless backport (c6b37d0d5) + 3. **expressions.rst**: Updated test expectations to match escape handling behavior + + +(cherry picked from commit 5677765e6d2f0203fc99014e5ebc4aa27424b57d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4603: Pushdown sort aggregate metrics + +**URL:** https://github.com/opensearch-project/sql/pull/4603 + +**Author:** @LantaoJin + +**Created:** 2025-10-20T09:16:34Z + +**State:** MERGED + +**Merged:** 2025-10-24T09:03:42Z + +**Changes:** +1156 -500 (41 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Pushdown sort aggregate metrics: +- Add `SORT_AGG_METRICS` after `AGGREGATION` in pushdown context +- Convert `composite` bucket agg to `terms/histo/date-histo` agg if sort one metric on `single` bucket +- Convert `composite` bucket agg (all terms source) to `multi-terms` agg if sort one metric on `multiple` buckets + +### Related Issues +Resolves #4282 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/SortAggregationMetricsRule.java:None` + + +nit: can be moved to `PlanUtils`, then used in rule config as a predicate + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AbstractAction.java:23` + + +javadoc may help future developers using this interface + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:97` + + +[non-blocking] I'm thinking is there a way to construct the aggregation upfront instead of decomposing a composite aggregation later. E.g. create term aggregation separately in aggregate analyzer, then only pushdown sort as bucket order here. This may simplify the implementation. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/SortAggregationMetricsRule.java:None` + + +IMO, not worth to move this to util class. This could be changed in following up PRs. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AbstractAction.java:23` + + +added + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:97` + + +composite aggregation is better than terms aggregation on performance even for single bucket + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/SortAggregationMetricsRule.java:None` + + +Could change to be an anonymous function in predication if not common used. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:None` + + +Is `path` refer to the field name of agg call here? Anyway, composite's sub agg could be other bucket builder than only metric builder here after we introduce auto_span or range bucket. It seems incorrect here in that case. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:287` + + +We can only do this operation when bucket_nullable=false, right? Any place to check this parameter? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:None` + + +do you mean composite + auto_span? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:287` + + +[AggPushDownAction.L97](https://github.com/opensearch-project/sql/pull/4603/files#diff-f1d75dafda6749c7779bd32c977296f79215fcd034e06d9d1e964461383dbed3R97) and L126 + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:None` + + +addressed, add more explain tests + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:287` + + +[nit] Will it be better to put these checks in one place if possible? Now the prerequisites are split both here and in `aggAction.pushDownSortAggMetrics`, although they are both `aggregationBuilders` and some checking seems overlapped. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:174` + + +Shouldn't throw exception here for now? Otherwise the sort is pushed down without any transformation. + +Could you please add a case for multi-terms? The plan should get unchanged since we don't implement muti-term agg now until the following-up PR. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:174` + + +Will fix the part in a followup PR soon when this PR merged. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:174` + + +Or I will submit the multi-terms in the same PR later today, what do you think? @qianheng-aws . just don't want make a single PR too large. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:287` + + +Cannot get the bucket_nullable information here. Aggregation has already pushed down. So [AggPushDownAction.L97](https://github.com/opensearch-project/sql/pull/4603/files#diff-f1d75dafda6749c7779bd32c977296f79215fcd034e06d9d1e964461383dbed3R97) and L126 would be better IMO. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:174` + + +Submitted a new commit and updated the PR description. Now both single bucket or multiple buckets are supported. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/SortAggregationMetricsRule.java:None` + + +fixed. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:287` + + +But I see you've already get the pushed aggregation builder here. +``` + List aggregationBuilders = + pushDownContext.getAggPushDownAction().getAggregationBuilder().getLeft(); +``` + +Or otherwise L290-310 could all be moved into `aggAction.pushDownSortAggMetrics`. + +It's also fine to me if not making change since the logic is correct anyway. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:287` + + +I see, but I don't want always to check CompositeValuesSourceBuilder.missingBucket() here since not all ValuesSourceBuilders are impacted by `bucket_nullable`, for example date histogram. So moving to here has to check types, sounds not worth to check types twice. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:192` + + +[nit, non-blocking] Could this be combined with above branch? i.e. Does `path = fieldNames.get(collations.get(0).getFieldIndex());` also works for `metric instanceof ValuesSourceAggregationBuilder.LeafOnly`? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:187` + + +Do we only support only 1 agg metric for this enhancement? And anywhere to check this? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:187` + + +> Do we only support only 1 agg metric for this enhancement? And anywhere to check this? + +https://github.com/opensearch-project/sql/pull/4603/files#diff-b4a250f8c0175f824789fb6cd403e69f4de986c912461daf766d39442a7f530bR40 + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java:187` + + +good catching. added restricted checking. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4602: Support `appendpipe`command in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4602 + +**Author:** @xinyual + +**Created:** 2025-10-20T07:50:38Z + +**State:** MERGED + +**Merged:** 2025-11-12T06:12:49Z + +**Changes:** +408 -1 (17 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Add appendpipe command. +`Appendpipe` appends the result of the subpipeline to the search results. Unlike a subsearch, the subpipeline is not run first. The subpipeline is run when the search reaches the appendpipe command. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +Review in progress + + +### @yuancu - DISMISSED + + +LGTM + + +## Review Comments + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:254` + + +Can you add a unit test in AstBuilderTest? + + +### @xinyual on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:254` + + +Already add one. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java:73` + + +I found a case that's slightly different from expectation: `source=opensearch-sql_test_index_account | stats count() by state, span(age, 10) as span | appendpipe [ stats count() by span | eval state='nomadland'] | sort span"` + +The result is not merged although both `state` in the main- and sub-query are of string types: +```json +{ + "schema": [ + { + "name": "count()", + "type": "bigint" + }, + { + "name": "span", + "type": "bigint" + }, + { + "name": "state", + "type": "string" + }, + { + "name": "state0", + "type": "string" + } + ] +} +``` + + +This may originates from the fact that the `state` in the main query is nullable, while the type of `state` in the subquery is not. IMO, they can be merged, but I'm also not quite sure whether this should be a concern. + + +### @xinyual on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java:73` + + +I think it's because I don't merge the latest code. This problem is already fixed by #4512 + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:261` + + +Will there be more than 1 children in the subquery? Maybe adding assertion and throw exception to avoid this case if we don't support. + + +## General Comments + + +### @xinyual + + +@Swiddis could you please approve again? + + +--- + +# PR #4601: [Backport 2.19-dev] Fix push down failure for min/max on derived field (#4572) + +**URL:** https://github.com/opensearch-project/sql/pull/4601 + +**Author:** @qianheng-aws + +**Created:** 2025-10-20T03:57:26Z + +**State:** MERGED + +**Merged:** 2025-10-20T08:11:38Z + +**Changes:** +26 -6 (3 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4572 commit https://github.com/opensearch-project/sql/commit/0257aa5d51f044f3287bbaffc83aa72f8960c57c) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4600: [Backport 2.19-dev] Add value type hint for derived aggregate group by field (#4583) + +**URL:** https://github.com/opensearch-project/sql/pull/4600 + +**Author:** @qianheng-aws + +**Created:** 2025-10-20T03:50:06Z + +**State:** MERGED + +**Merged:** 2025-10-20T08:12:15Z + +**Changes:** +147 -60 (12 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4583 commit https://github.com/opensearch-project/sql/commit/0499e95203f91f4570c6f791c7395f02ca1b5cf2) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4599: Support Automatic Type Conversion for REX/SPATH/PARSE Command Extractions + +**URL:** https://github.com/opensearch-project/sql/pull/4599 + +**Author:** @penghuo + +**Created:** 2025-10-18T05:09:09Z + +**State:** MERGED + +**Merged:** 2025-10-23T15:41:38Z + +**Changes:** +630 -96 (11 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +* Expand the Calcite coercion logic, so functions and aggregation operators automatically cast compatible strings to numeric or boolean overloads, and return the coerced Rex nodes to the execution pipeline. +* Test cover in [4356.yaml](https://github.com/penghuo/os-sql/blob/1066422cd23a2ba05ef74b5900a226b096a63dde/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4356.yml). +* [Document implicit conversion rules for PPL](https://github.com/opensearch-project/sql/pull/4599/files#diff-2cd6bff3d6c2cac7daebd7d067ebba2c545feb4c8332201f6bf78cede14b0b52) +* To revieiwer + * Add TypeChecker for SUBTRACT, CEIL/CEILING, FLOOR, and ROUND. It is a workaround to support sequence CompositeOperandTypeChecker. + * To migrate user issue, current CoercionUtils change is hack, refactor the code in following PRs to include leverage TypeCoercionImpl + +### Related Issues +https://github.com/opensearch-project/sql/issues/4634 +Resolves #4356 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:225` + + +Does this mean that the only viable cast between string and numbers are string -> double? Are all the rest like number -> string, string -> integer not possible? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:None` + + +I'm wondering why is here a special handling for string and number types. If I got it correctly, the computed targetType should already be a double if the arugment is a string and there exists a signature that accepts double. + +I'm worried that it will cast string to double for cases where there actually only accepts integer: e.g. will the `"1"` in `week('2020-12-11', '1')` be casted to double 1? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:244` + + +Do we rewrite `WideningTypeRule.max` and `WideningTypeRule.distance` to avoid introducing breaking changes to v2? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:244` + + +Yes. +The straitforward solution is add double as parent of string, but it will breaking existing V2 behavior. +I plan to decouple coercion logic from V2 type system. what do u think? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:225` + + +yes, only string -> double (to the most widen numeric type). the reasosn is to handle most of use case, e.g. "1.1" +number -> string, maybe. not the [major use case](https://github.com/opensearch-project/sql/issues/4356) to solve. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:None` + + +currently, distance function is measure direct relation of two data types. for example, [string, int], will return IMPOSSIBLE, but we expected return [safe_cast(string as double), cast(int as double)]. + +Agree, code looks tricky, i am working on better way to handle this. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:216` + + +Copy distance max function WidenRule to avoid impact v2 behavior. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:40` + + +As used to be mentioned in the original PR of type coercion, https://github.com/opensearch-project/sql/pull/3914#discussion_r2244919778, we may have chance to do type coercion based on Calcite's type system like `RelDataType` and SqlTypeName. Then we should be able to avoid reusing v2's coercion logic. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:None` + + +Shouldn't `findWidestType(arguments)` return `double` as the `targetType` for [string, int]? I see `COMMON_COERCION_RULES` has contained such a rule. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Shouldn't this be `nodes.subList(1, nodes.size())`? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:40` + + ++1. the next step is refactor TypeCoercion logic, and try to leverage calcite TypeCoercionImpl interface. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Thanks! +https://github.com/opensearch-project/sql/pull/4599/commits/cde2238291a01016c22dcc4cb4573ba8177ce755 + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:448` + + +To reviewer: validateFunctionArgs return coercionNode if possible, if not, throw ExpressionEvaluationException (current behavior). + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:None` + + +fixed. reuse resolveCommonType. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:244` + + +Let's keep it this way then. As mentioned in [another comment](https://github.com/opensearch-project/sql/pull/4599#discussion_r2452503624), we can refactor later when we adopt `RelDataType` for type coercion. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:473` + + +Why is the cast necessary when there are string in fields? + +Update: I get it. It is to apply string->number coercions if necessary. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4598: Onboarding async query core and grammar files to maven snapshots + +**URL:** https://github.com/opensearch-project/sql/pull/4598 + +**Author:** @peterzhuamazon + +**Created:** 2025-10-18T04:51:48Z + +**State:** MERGED + +**Merged:** 2025-10-20T16:58:46Z + +**Changes:** +31 -14 (3 files) + +**Labels:** `enhancement`, `release` + +**Assignees:** @peterzhuamazon + + +## Description + +# ~~Pending build repo changes on publish-snapshot.sh to support s3 publishing.~~ +* https://github.com/opensearch-project/opensearch-build/pull/5789 + +### Description +Onboarding async query core and grammar files to maven snapshots + +### Related Issues +https://github.com/opensearch-project/opensearch-build/issues/5360 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @peterzhuamazon + + +* Supporting build PR: https://github.com/opensearch-project/opensearch-build/pull/5789 + + +### @peterzhuamazon + + +Hi @penghuo @RyanL1997 +Would you please take a look? + +Publish examples: +* grammar: https://github.com/opensearch-project/sql/actions/runs/18619934328/job/53089722015 +* async: https://github.com/opensearch-project/sql/actions/runs/18619934334/job/53089867044 + +Thanks! + + +--- + +# PR #4596: [AUTO] Increment version to 3.3.1-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/4596 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-17T20:25:26Z + +**State:** MERGED + +**Merged:** 2025-10-20T23:22:50Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `v3.3.1` + + +## Description + +- Incremented version to **3.3.1-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4593: [Backport 2.19-dev] Make composite bucket size configurable (#4544) + +**URL:** https://github.com/opensearch-project/sql/pull/4593 + +**Author:** @LantaoJin + +**Created:** 2025-10-17T02:55:22Z + +**State:** MERGED + +**Merged:** 2025-10-20T03:54:49Z + +**Changes:** +147 -18 (15 files) + + +## Description + +(cherry picked from #4544 commit 31f81b11148b4724a30d05dac72b03ada1502ea3) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4592: [Backport 2.19-dev] Support Regex for replace eval function (#4456) + +**URL:** https://github.com/opensearch-project/sql/pull/4592 + +**Author:** @ahkcs + +**Created:** 2025-10-16T21:48:18Z + +**State:** MERGED + +**Merged:** 2025-10-17T00:06:39Z + +**Changes:** +256 -3 (4 files) + + +## Description + +(cherry picked from commit a8f08ad3e101e05aefecca4765d484d1992a12b3) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4589: [Backport 2.19] Onboarding new maven snapshots publishing to s3 (sql) + +**URL:** https://github.com/opensearch-project/sql/pull/4589 + +**Author:** @peterzhuamazon + +**Created:** 2025-10-16T18:38:50Z + +**State:** MERGED + +**Merged:** 2025-10-28T16:28:56Z + +**Changes:** +21 -15 (6 files) + +**Labels:** `enhancement`, `release` + +**Assignees:** @peterzhuamazon + + +## Description + +### Description +[Backport 2.19] Onboarding new maven snapshots publishing to s3 (sql) + +### Related Issues +https://github.com/opensearch-project/opensearch-build/issues/5360 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +CI failure: + +``` +Execution failed for task ':opensearch:compileJava'. +For more on this, please refer to https://docs.gradle.org/8.4/userguide/command_line_interface.html#sec:command_line_warnings in the Gradle documentation. +> Could not resolve all files for configuration ':opensearch:compileClasspath'. +98 actionable tasks: 98 executed + > Could not resolve org.opensearch:opensearch-ml-client:2.19.4.0-SNAPSHOT. + Required by: + project :opensearch + > Could not resolve org.opensearch:opensearch-ml-client:2.19.4.0-SNAPSHOT. + > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/maven/org/opensearch/opensearch-ml-client/2.19.4.0-SNAPSHOT/maven-metadata.xml. + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/maven/org/opensearch/opensearch-ml-client/2.19.4.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden + > Could not resolve org.opensearch:opensearch-ml-client:2.19.4.0-SNAPSHOT. + > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/lucene/org/opensearch/opensearch-ml-client/2.19.4.0-SNAPSHOT/maven-metadata.xml. + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/lucene/org/opensearch/opensearch-ml-client/2.19.4.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden +``` + +Likely due to snapshot currently unavailable on the link + + + + +### @peterzhuamazon + + +Still pending publishing all the snapshots, expected. + + +### @peterzhuamazon + + +* Pending: https://github.com/opensearch-project/ml-commons/pull/4143 + + +--- + +# PR #4588: Onboarding new maven snapshots publishing to s3 (sql) + +**URL:** https://github.com/opensearch-project/sql/pull/4588 + +**Author:** @peterzhuamazon + +**Created:** 2025-10-16T18:32:29Z + +**State:** MERGED + +**Merged:** 2025-10-18T04:37:36Z + +**Changes:** +23 -20 (6 files) + +**Labels:** `enhancement`, `release`, `backport 3.3` + +**Assignees:** @peterzhuamazon + + +## Description + +### Description +Onboarding new maven snapshots publishing to s3 (sql) + +### Related Issues +https://github.com/opensearch-project/opensearch-build/issues/5360 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `build.gradle:None` + + +please move `mavenCentral()` forward, or CI could fail with "service unstable" when download dependencies. +``` +mavenCentral() +maven { url "https://ci.opensearch.org/ci/dbc/snapshots/maven/" } +``` + + +## General Comments + + +### @peterzhuamazon + + +* Pending https://github.com/opensearch-project/security/pull/5719 + + +### @peterzhuamazon + + +GEO also 3.4.0. + + +### @peterzhuamazon + + +Sql is depending on sql itself? +``` + +* What went wrong: +Could not determine the dependencies of task ':integ-test:integTestWithSecurity'. +> Could not resolve all files for configuration ':integ-test:zipArchive'. + > Could not resolve org.opensearch.plugin:opensearch-sql-plugin:3.1.0.0-SNAPSHOT. + Required by: + project :integ-test + > Could not resolve org.opensearch.plugin:opensearch-sql-plugin:3.1.0.0-SNAPSHOT. + > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/maven/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml. + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/maven/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden + > Could not resolve org.opensearch.plugin:opensearch-sql-plugin:3.1.0.0-SNAPSHOT. + > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/lucene/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml. + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/lucene/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden +``` + + +### @peterzhuamazon + + +* 2.19 backport : https://github.com/opensearch-project/sql/pull/4589 + + +### @ahkcs + + +> Sql is depending on sql itself? +> +> ``` +> +> * What went wrong: +> Could not determine the dependencies of task ':integ-test:integTestWithSecurity'. +> > Could not resolve all files for configuration ':integ-test:zipArchive'. +> > Could not resolve org.opensearch.plugin:opensearch-sql-plugin:3.1.0.0-SNAPSHOT. +> Required by: +> project :integ-test +> > Could not resolve org.opensearch.plugin:opensearch-sql-plugin:3.1.0.0-SNAPSHOT. +> > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/maven/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml. +> > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/maven/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden +> > Could not resolve org.opensearch.plugin:opensearch-sql-plugin:3.1.0.0-SNAPSHOT. +> > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/lucene/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml. +> > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/lucene/org/opensearch/plugin/opensearch-sql-plugin/3.1.0.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden +> ``` + +It's getting called here: +``` +zipArchive group: 'org.opensearch.plugin', name:'opensearch-sql-plugin', version: "${bwcVersion}-SNAPSHOT" +``` +In `integ-test/build.gradle` line 218 for BWC tests + + + +### @peterzhuamazon + + +Will publish 3.1.0.0-SNAPSHOT. + + +### @peterzhuamazon + + +Also requires ml3.3.0 + + +### @peterzhuamazon + + +All test passing. + + +### @peterzhuamazon + + +Hi @ryanL1997 @penghuo could you help approve and merge? + +Thanks. + + +--- + +# PR #4586: Allow renaming group-by fields to existing field names + +**URL:** https://github.com/opensearch-project/sql/pull/4586 + +**Author:** @yuancu + +**Created:** 2025-10-16T17:54:52Z + +**State:** MERGED + +**Merged:** 2025-10-24T02:34:48Z + +**Changes:** +143 -1 (2 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description + + +This PR fixes a bug in Calcite-enabled PPL queries where group-by fields cannot be aliased to their original field names, causing queries to fail with "field not found" errors. + +When Calcite is enabled, PPL queries that use span functions with aliases matching the original field names fail with errors like: `field [value] not found; input fields are: [value0, count()]` + +**Affected Query Patterns:** +- `source=time_test | stats count() by span(value, 2000) as value` +- `source=time_test | stats count() by span(timestamp, 1h) as timestamp` + + +**Root Cause Analysis** + +The issue occurs during Calcite's aggregation processing: + +1. Input Dependencies: Calcite identifies that both the input field value and output alias value have the same name +2. Name Collision Resolution: To avoid conflicts, Calcite automatically renames fields by adding numeric suffixes (value → value0) +3. Reference Resolution Failure: Subsequent query processing still references the original name value, but Calcite has internally renamed it to value0, causing a "field not found" error + +**Solution Implementation** + +This PR implements a post-aggregation field renaming strategy that preserves intended aliases. + +### Related Issues +Resolves #4580 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1051` + + +can the `PlanUtil.getInputRefs` be used to replace this? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +when the `names.size` not equals to `reResolved.getLeft().size()`? seems the condition is always true + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +can you rename the var `names` to make it more meaningful + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +The lengths do not equal when a group key is not aliased -- under which circumstance `extractAliasLiteral` will return empty: +```java +private Optional extractAliasLiteral(RexNode node) { + if (node == null) { + return Optional.empty(); + } else if (node.getKind() == AS) { + return Optional.of((RexLiteral) ((RexCall) node).getOperands().get(1)); + } else { + return Optional.empty(); + } +``` + +Although it seems that all group keys are aliased in practice, this defense check was to prevent unintended future changes to avoid in-correspondent renaming. Should I remove it? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Renamed + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1051` + + +I think they serve different purposes. `PlanUtil.getInputRefs` returns all referred input refs. Besides, if a node refers multiple inputs, it will return all of them. Yet here I just want to check whether a node is an input ref (optionally aliased), keeping the node as is. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1018` + + +In `Registrar.registerExpression`, seems the other computed expression won't promise following the original order if there is expression duplication. + +But since our PPL only allow `span` expr in our group by and it cannot be combined with other `span` expr. This logic may be right and I cannot find any bad case so far. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1045` + + +Is there any case that we have `DESCENDING, NULLS_FIRST, NULLS_LAST` in our `stats .. by ...` command + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1045` + + +No, I didn't manage to create any. It seems there is always a projection after sorting and before aggregation. + +E.g. +``` +LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(value=[$2]) + LogicalSort(sort0=[$2], dir0=[DESC-nulls-last]) +``` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1018` + + +I found a bad case: `stats count() by value, value, @timestamp`. I'll fix it. + +**Update**: Fixed by checking duplication + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4585: [Backport 2.19-dev] Add MAP_REMOVE internal function for Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4585 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-16T16:16:26Z + +**State:** MERGED + +**Merged:** 2025-10-18T04:20:45Z + +**Changes:** +495 -3 (6 files) + +**Assignees:** @ykmr1224 + + +## Description + +Backport e3ab9d09a075a093b90687019d4dbc61c7ff76ec from #4511. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4583: Add value type hint for derived aggregate group by field + +**URL:** https://github.com/opensearch-project/sql/pull/4583 + +**Author:** @qianheng-aws + +**Created:** 2025-10-16T08:44:20Z + +**State:** MERGED + +**Merged:** 2025-10-17T02:35:35Z + +**Changes:** +147 -61 (13 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Add value type hint for derived aggregate group by field + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4469 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:645` + + +why not just simplify method signature to +``` +private static T withValueTypeHint(T sourceBuilder, RelDataType groupType) +``` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:645` + + +Just using template `T` cannot call `userValueTypeHint` API. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4579: Support `chart` command in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4579 + +**Author:** @yuancu + +**Created:** 2025-10-16T02:58:37Z + +**State:** MERGED + +**Merged:** 2025-11-07T08:37:54Z + +**Changes:** +2335 -180 (44 files) + +**Labels:** `feature`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +The chart command returns an aggregation result that can be easily pivoted to a two-dimension table format. + + +**Work items** +**Difference between stats, timechart, and chart** + +- with `stats` + `chart` is conceptually similar to `stats` in that they all compute an aggregation value and then group them by a given criterion. The main differences lies in their output format. + + For example, for the query in introduction, we can rewrite it with stats: `... | stats count BY status, host`. It gives the following result: + + status | host | count + -- | -- | -- + 200 | www1 | 11835 + 200 | www2 | 11186 + 200 | www3 | 11261 + 400 | www1 | 233 + 400 | www2 | 257 + 400 | www3 | 211 + 403 | www2 | 228 + 404 | www1 | 244 + 404 | www2 | 209 + 404 | www3 | 237 + + Each field specified in BY clause becomes a separate column in the result table; each row is a unique combination of them. Whereas in `chart`, each unique status becomes a row; each individual value of host becomes a column. This format makes it easier to view and visualize the results. + + Similar to the syntax of `stats`, an equivalent expression of `... | CHART count OVER status BY host` is `... | CHART count BY status, host`. + + - It is worth noticing that `chart` will ignore documents with NULL in row split and in aggregation results, while `stats` will keep them. + +- with `timechart` + + The key difference between `chart` and `timechart` is that `timechart` leverages a default `@timestamp` field as the field to perform aggregation on. `... | timechart agg BY field` is conceptually equivalent to `... | CHART agg OVER _time BY field` + + The following table summarizes the differences between the three commands. + + |Feature|chart|stats|timechart| + |---|---|---|---| + |BY clause fields|Limited to two (row-split, column-split)|Multiple (3+ possible)|Always uses _time + one optional field| + |Primary purpose|Consolidated visualizations|Detailed statistical calculations|Time-based analysis| + |Output format|Table format optimized for visualization (Not implemented) |Row-based results|Table format time series visualization (Not implemented)| + |Best use case|Data comparisons across categories|Detailed data analysis|Trend analysis over time| + |X-axis control|Any field|N/A|Always _time| + + +### Related Issues +Resolves #399 + +### Implementation Walk-through + +Ideally, chart should pivot the result into a 2-dimension table. E.g. for the following table: + +| a | b | val | +|---|---|---| +| m | x | 3 | +| m | y | 4 | + +`| chart avg(val) by a, b` should make it a table like this: + +| a | x | y | +|---|---|---| +| m | 3 | 4 | + + +However, it seems dynamic pivoting is not supported in SQL/Calcite (see original discussion in https://github.com/opensearch-project/sql/issues/3965#issue-3281969676). Therefore, the result table for the implemented`chart` is like: + +| a | b | avg(val) | +|---|---|---| +| m | x | 3 | +| m | y | 4 | + +The pivoting can be performed in the front-end. + +The above operation is equivalent to `stats avg(val) by a, b` -- this is the case when parameters like `usenull`, `useother`, and `limit` is not involved in the result. + +When these parameters are involved, `chart` command will find the top-N categories of `b`, aggregating the rest to an `OTHER` category, and aggregating those whose `b` is null to a "NULL" category. This leads to the following implementation: + +1. normal aggregation based on a, b (equivalent to `stats agg_func by a, b`) +2. find out the top-N categories (unique values of column b) by aggregating on the above aggregation results + 1. aggregate on b + 2. sort on aggregation results + 3. number the rows +3. left join the ranked results with the original aggregation +4. keep rows whose row number is no greater than the limit, categorizing the rest to OTHER or NULL +5. Aggregate again because values categorized into OTHER or NULL need to be merged + +**Note**: + +This implementation did not reuse the implementation of timechart to circumvent some existing bugs. A following PR will merge their implementation as chart essentially is a superset of timechart in terms of functionality. + +**Future work items** +### Check List + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +Code explanation. + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2446` + + +The fields are `[row-split, col-split, aggregation]` now + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2454` + + +Convert the column split to string so that they can be labels of columns once pivoted. This also guarantees that its type is compatible with `nullstr` and `otherstr`. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +`testChartWithNullAndLimit` covers this case. Without this line, it will number rows who don't have a column split if their aggregation result is great. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +See explanations in #4594 + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2465` + + +aggregated: `[row-split, col-split, aggregation]` +ranked: `[col-split, grand-total, row-number]` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2512` + + +Final aggregation: to merge values in the `OTHER` categories. + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +Explain definition of top in doc, is it `stats dc by col | sort -dc, +col`? + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +column -> column_split + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +change doc, make it clearly. +* usenull=true only applie to column_split +* row_split should always be non-null value. + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +The column split field in the result will become strings -> +The fields generated by column splitting are converted to strings + + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:157` + + +I expect result should another row? +F 33 0 + +then, pivot table will be +gender 33 OTHER +M,1,2 +F,1,0 + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +syntax should be `limit=top10` + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:184` + + +add an example to demo convert column_split to string. + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:157` + + +Since we are not really doing pivoting, I think it's better to omit empty groups? This avoids a large sparse response and reduces traffic. Besides, other aggregations also don't return results for empty buckets. + +`timechart` also claims to omit those buckets: + +> Only combinations with actual data are included in the results - empty combinations are omitted rather than showing null or zero values. + +If front-end wants to add it back, they can easily fill null or 0 to those missing groups. + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:None` + + +It's to keep the top-K categories (distinct column splits). + +E.g. +- `chart limit=1 count() by a b` keeps the top 1 b categories with most rows +- `chart limit=bottom3 sum(value) by a b` keeps the one b categories with minimum sum of values in its category. +- `chart limit=top2 min(value) by a b` keeps 2 b categories whose minimum value within its group are the smallest 2. +- `chart limit=bottom2 min(value) by a b` keep 2 b categories whose minimum value within its group are the largest 2. + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:None` + + +Fixed + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:None` + + +Fixed. row_split can actually contain `null`; it will be handled in the same manner as normal aggregations like `stats count() by a, b` where there exists null values in column a. + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:None` + + +Fixed. Thanks for the suggestion! + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:None` + + +Fixed. Thanks for double checking + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:184` + + +It's actually covered by example 3 and 4. Updated their descriptions. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml:None` + + +`timechart` has another sort operator upon the final aggregate operator to ensure the sequence. Does `chart` has to keep the same behavior? + + +### @qianheng-aws on `docs/user/ppl/cmd/chart.rst:157` + + +``` + // Use zero-filling for count aggregations, standard result for others + if (valueFunctionName.equals("count")) { + return buildZeroFilledResult( + completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + } else { + return buildStandardResult( + completeResults, topCategories, byFieldName, valueFunctionName, useOther, context); + } +``` + +`timechart` fills zero specially only for `count` agg and that's what makes the plan so complex for `count`. I cannot recall the reason why we do this, maybe just want to keep align with SPL? + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:157` + + +Yes, her aim was to align with SPL. In timechart's issue (#3965), it states: +> Post-Processing: The formatter layer transforms this "long" format into "wide" format by: +> - Filling missing values with 0 for hosts with no data in a time bucket + +However, I don't find filling 0 only for count is necessary considering that we are not really converting the output to a *wide* format. This is also inconsistent with the output of other aggregation functions, where the values for missing group combinations (`{F-33}` for the above case) are not filled. If front-end side is going to pivoting the result, it's better to let it fill the missing value as well; if they don't, it does not make much sense to manually create the empty group combinations just for count. + + + +### @yuancu on `integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml:None` + + +Good catch. I believe the added order is to align with SPL's behavior. The row and column names are indeed sorted in SPL. + +Yet I'm a little unsure whether it is necessary as SPL's doc did not explicit or guarantee any order of its output for chart and timechart. + + +### @yuancu on `integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml:None` + + +I added the final reorder. I'll revert it if it's unnecessary. + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +> Null values in the row split are handled in the same way as normal aggregations. + +null value in row split should be ignored. + + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:None` + + +I suspect topk is not distinct columns splits. e.g. depend on metrics agg +* case-1, distinct columns +``` +| chart limit=1 count by id, depto + +## case-1, distinct columns +| stats dc by split_cols +``` + +* case-2, remove invalide rows, then distinct columns +``` +| chart limit=1 sum(age) as sum by id, depto + +## case-2, remove invalide rows, then distinct columns +| where isnumber(age) +| stats dc by split_cols +``` + + +### @penghuo on `docs/user/ppl/cmd/chart.rst:157` + + +chart and timechart should have same behavior on filling. +@yuancu could u sync with yulong to double confirm the frontend can fill results when pivoting. + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:None` + + +Sorry for missing the point. Double confirmed SPL's behavior, null values in row splits are indeed ignored. + +**Update**: Fixed; ignores events without a row split now + + +### @yuancu on `docs/user/ppl/cmd/chart.rst:157` + + +I discussed with yulong, filling 0 for empty group for count in front-end is not a problem. + +I'll leave the empty groups unfilled for now. + + +## General Comments + + +### @yuancu + + +> Thanks for the change! Please follow up on unifiy chart and timechart implementation in following PRs. + +Thanks @penghuo ! Raisee #4755 for the unification + + +--- + +# PR #4577: [Backport 2.19-dev] Add MAP_APPEND internal function to Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4577 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-15T20:08:21Z + +**State:** MERGED + +**Merged:** 2025-10-15T23:05:30Z + +**Changes:** +557 -29 (9 files) + +**Assignees:** @ykmr1224 + + +## Description + +Backport cba8d0203f75bb7560906cabc36778f9b24f5479 from #4515. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4574: [Backport 2.19-dev]Fix sort push down into agg after project already pushed (#4546) + +**URL:** https://github.com/opensearch-project/sql/pull/4574 + +**Author:** @qianheng-aws + +**Created:** 2025-10-15T10:07:52Z + +**State:** MERGED + +**Merged:** 2025-10-16T02:31:42Z + +**Changes:** +79 -15 (9 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/5630119fd143915653de74b02f96d6d524909bbf from https://github.com/opensearch-project/sql/pull/4546. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4573: [Backport 2.19-dev] Partially backport #4378 excluding `shard_doc` + +**URL:** https://github.com/opensearch-project/sql/pull/4573 + +**Author:** @LantaoJin + +**Created:** 2025-10-15T10:06:30Z + +**State:** MERGED + +**Merged:** 2025-10-16T02:41:01Z + +**Changes:** +141 -167 (105 files) + + +## Description + +(cherry picked from #4378 commit 3e951474202bfe5586815305aaf72059a5ecd816) + +The purpose of partially backport #4378 (excluding `shard_doc` related optimization) is to avoid the plan conflicts between 2.19-dev and main. Since the default order of query results is solid (`shardIndex` then `doc` in Lucene, not like the `_shard_doc` of OpenSearch, no need to generate), no need to add `_doc` explicitly. +Ref https://github.com/apache/lucene/blob/8e8e37d9e94c290cf8d02e9f318e601baedf28bc/lucene/core/src/java/org/apache/lucene/search/TopDocs.java#L43-L45, + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4572: Fix push down failure for min/max on derived field + +**URL:** https://github.com/opensearch-project/sql/pull/4572 + +**Author:** @qianheng-aws + +**Created:** 2025-10-15T09:54:44Z + +**State:** MERGED + +**Merged:** 2025-10-17T02:42:41Z + +**Changes:** +26 -6 (3 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix push down failure for min/max on derived field + +https://github.com/opensearch-project/sql/pull/4281 introduced below code for MIN/MAX in `AggregateAnalyzer::createRegularAggregation` +``` + String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); + ExprType fieldType = helper.fieldTypes.get(fieldName); +``` +which will throw NPE for derived fields, and then fails to push dow the agg. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4571, https://github.com/opensearch-project/sql/issues/4460 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4569: Use `_doc` + `_shard_doc` as sort tiebreaker to get better performance + +**URL:** https://github.com/opensearch-project/sql/pull/4569 + +**Author:** @LantaoJin + +**Created:** 2025-10-15T08:48:44Z + +**State:** MERGED + +**Merged:** 2025-10-15T17:45:29Z + +**Changes:** +24 -15 (3 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Before https://github.com/opensearch-project/sql/pull/4378, the sort in PIT search is +case 1: if no sort field specified, sort by `_doc` + `_id` (+ means "then"). (❎ could cause high memory issue) +case 2: if sort fields specified, sort by `fields`. (❎ paged results could miss or duplicate hits) +case 3: if sort fields specified and query contains a filter, sort by `_doc`. (❎ paged results could miss or duplicate hits) + +https://github.com/opensearch-project/sql/pull/4378 added the `_shard_doc` as sort tiebreaker with +case 1: if no sort field specified, sort by `_shard_doc`. (❎ performance regression) +case 2: if sort fields specified, sort by `fields` + `_shard_doc`.(❎ lower performance on low cardinality field) + +https://github.com/opensearch-project/sql/pull/4435 found performance regression in case 1 and partially revert the changes to +case 1: if no sort field specified, sort by `_doc` + `_id`. (❎ could cause high memory issue) +case 2: if sort fields specified, sort by `fields`. (❎ paged results could miss or duplicate hits) + +After this PR, we change the sort in PIT search to +case 1: if no sort field specified, sort by `_doc` + `_shard_doc`. ✅ +case 2: if sort fields specified, sort by `fields` + `_doc` + `_shard_doc`.✅ + +**RCA of performance regression**: +`_shard_doc` is not a stored field in index which will be generated in runtime when comparison. Computing `_shard_doc` per document is a high cost operation. But sorting by `_doc` then `_shard_doc` only generates `_shard_doc` when the `_doc` values are conflicted. +Even in the case of user specified sort fields, we should sort by `fields` then `_doc` then `_shard_doc` to reduce the computing of `_shard_doc`. For example, if the sort field is a low cardinality field, e.g. `gender`, sorting by `gender` then `_doc` then `_shard_doc` generates `_shard_doc` for comparison only if values of `gender` and `_doc` are both conflicted. + +**This PR is no needed to backport to 2.19-dev since `shard_doc` feature is only available since OS 3.3.0** + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java:214` + + +Does it matter if we duplicate fields in the sorting list? We could simplify/remove the below `else` logic by just always appending this, I would expect Lucene to optimize it in the background but I haven't measured it. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java:214` + + +not sure will Lucene optimize duplicated `fields` or `_doc` in sorting, but for sure the duplicated `_shard_doc` is not allowed in OpenSearch Core. It is no harmful for restricted checker here. + + +## General Comments + + +### @anasalkouz + + +Can you share the performance benchmark for the 3 approaches? + + +### @LantaoJin + + +> Can you share the performance benchmark for the 3 approaches? + +I haven't run the benchmark, the RCA was made by reading the code of Luence and OS `shard_doc` feature. + +The performance of `_doc` then `_shard_doc` is same as `_doc` then `_id`, provided by @ahkcs on Oct 1st. (the case 1) + +For case 2, the current `fields` + `_doc` + `_shard_doc` is an further optimization upon `fields` + `_shard_doc` based on above benchmark result with inference. + +Will rerun some benchmark to double confirm. + + +--- + +# PR #4568: [Backport 2.19-dev][FollowUp] Set 0 and negative value of subsearch.maxout as unlimited (#4534) + +**URL:** https://github.com/opensearch-project/sql/pull/4568 + +**Author:** @LantaoJin + +**Created:** 2025-10-15T08:45:31Z + +**State:** MERGED + +**Merged:** 2025-10-15T09:21:29Z + +**Changes:** +26 -54 (12 files) + + +## Description + +(cherry picked from #4534 commit de2fdc87db56c5e26a86c25713edff746d5e35ac) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4567: [Backport 2.19-dev] Check server status before starting Prometheus + +**URL:** https://github.com/opensearch-project/sql/pull/4567 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-15T06:18:34Z + +**State:** MERGED + +**Merged:** 2025-10-15T07:06:37Z + +**Changes:** +28 -2 (2 files) + + +## Description + +Backport 89dbc31d526e18c3a6b74e313241174dfc82824b from #4537. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4566: [Backport 2.19-dev]Including metadata fields type when doing agg/filter script push down (#4522) + +**URL:** https://github.com/opensearch-project/sql/pull/4566 + +**Author:** @qianheng-aws + +**Created:** 2025-10-15T05:07:37Z + +**State:** MERGED + +**Merged:** 2025-10-15T06:44:22Z + +**Changes:** +65 -18 (11 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/42a415fc3f3f439b535fb7ffd688b3d81f0244ff from https://github.com/opensearch-project/sql/pull/4522. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4562: Update PPL Command Documentation + +**URL:** https://github.com/opensearch-project/sql/pull/4562 + +**Author:** @ritvibhatt + +**Created:** 2025-10-15T01:24:18Z + +**State:** MERGED + +**Merged:** 2025-11-19T09:05:52Z + +**Changes:** +1457 -2461 (48 files) + +**Labels:** `documentation`, `v3.4.0` + + +## Description + +This PR standardizes the structure and content of PPL command documentation files to improve consistency and user experience. + +## Changes Made + + ### Documentation Structure Standardization + Implemented consistent section ordering across all PPL command files: + 1. **Description** - Clear explanation of command functionality + 2. **Syntax** - Command syntax with parameter details + 3. **Optional sections** - Behavior notes, configuration details, or usage guidance as needed + 4. **Examples** - Practical usage examples with expected output + 5. **Limitations** - Known constraints or considerations + +### Content Cleanup and Modernization + - **Removed version information** - Eliminated outdated version references (e.g., "Version: + 3.3.0") to keep documentation current and reduce maintenance overhead + - **Extracted aggregation functions** - Moved aggregation function documentation to dedicated + functions file (`/functions/aggregations.rst`) for better organization + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] +#4220 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Hi @ritvibhatt , thanks for the change. And I can signoff for `rex`, `regex`, and `parse`. And I just left some minor comments. + + +### @dai-chen - DISMISSED + + +Reviewed doc relate to `count(eval)` and `per_second` functions. Thanks! + + +### @Swiddis - DISMISSED + + +crashes my web viewer of the diff, lgtm + + +### @LantaoJin - APPROVED + + +LGTM, will keep eyes on CI. + + +## Review Comments + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:218` + + +I saw that in the `regex.rst`, there are some formatting changes in the "Limitation" section, and I was wondering do we need to apply the same here? + + +### @LantaoJin on `docs/user/ppl/cmd/append.rst:None` + + +Need a `|` before head of this line. + + +### @LantaoJin on `docs/user/ppl/cmd/appendcol.rst:None` + + +no need to keep the `|` any more if there is single line in description. `|` means starting from a new line + + +### @LantaoJin on `docs/user/ppl/cmd/dedup.rst:None` + + +ditto + + +### @LantaoJin on `docs/user/ppl/cmd/describe.rst:None` + + +should be `describe [dataSource.][schema.]` + + +### @LantaoJin on `docs/user/ppl/cmd/expand.rst:None` + + +why not bullet mark here? + + +### @LantaoJin on `docs/user/ppl/cmd/fillnull.rst:21` + + +if you delete new line, please add `|` at the beginning of each line. + + +### @LantaoJin on `docs/user/ppl/cmd/fillnull.rst:23` + + +ditto + + +### @LantaoJin on `docs/user/ppl/cmd/fillnull.rst:None` + + +delete last `=` + + +### @LantaoJin on `docs/user/ppl/cmd/fillnull.rst:None` + + +ditto and following + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:215` + + +this information (how to enable full capability of join types) totally lost + + +### @LantaoJin on `docs/user/ppl/cmd/lookup.rst:None` + + +delete last two `=` + + +### @LantaoJin on `docs/user/ppl/cmd/lookup.rst:None` + + +delete last one `=` + + +### @LantaoJin on `docs/user/ppl/cmd/patterns.rst:None` + + +add 4 `=` + + +### @LantaoJin on `docs/user/ppl/cmd/patterns.rst:None` + + +add more `=` + + +### @LantaoJin on `docs/user/ppl/cmd/patterns.rst:None` + + +delete one space between `With` and `option` + + +### @LantaoJin on `docs/user/ppl/cmd/rare.rst:None` + + +remove some `=` + + +### @LantaoJin on `docs/user/ppl/cmd/rare.rst:None` + + +delete the `|` + + +### @LantaoJin on `docs/user/ppl/cmd/reverse.rst:None` + + +delete some `=` + + +### @LantaoJin on `docs/user/ppl/cmd/rex.rst:None` + + +ditto + + +### @LantaoJin on `docs/user/ppl/cmd/rex.rst:None` + + +one less `=` required for this and follow examples + + +### @LantaoJin on `docs/user/ppl/cmd/sort.rst:17` + + +delete some `=` + + +### @LantaoJin on `docs/user/ppl/cmd/sort.rst:None` + + +add some `=` + + +### @LantaoJin on `docs/user/ppl/cmd/sort.rst:193` + + +ditto + + +### @LantaoJin on `docs/user/ppl/cmd/spath.rst:None` + + +`=` problem + + +### @LantaoJin on `docs/user/ppl/cmd/stats.rst:None` + + +`The argument of aggregation must be field.` can be removed + + +### @LantaoJin on `docs/user/ppl/cmd/subquery.rst:None` + + +`=` problem + + +### @LantaoJin on `docs/user/ppl/cmd/timechart.rst:None` + + +`=` problem + + +### @ritvibhatt on `docs/user/ppl/cmd/rex.rst:218` + + +I think the format is good, I removed the first sentence in limitations but let me know if you think format can be changed to match regex better + + +### @ritvibhatt on `docs/user/ppl/cmd/appendcol.rst:None` + + +Removed all the ```|``` where there was only a single line thanks! + + +## General Comments + + +### @aalva500-prog + + +Hi @ritvibhatt, thanks for the changes! Quick question: should we also update the documentation in `condition.rst` for recently added functions like `coalesce`? + + + +### @ritvibhatt + + +> Hi @ritvibhatt, thanks for the changes! Quick question: should we also update the documentation in `condition.rst` for recently added functions like `coalesce`? + +Hi @aalva500-prog, thanks for bringing that up! I have now updated the functions documentation as well + + +### @ahkcs + + +Hi @ritvibhatt , reviewed `bin`, `replace`, `fillnull` commands and I think it looks good, for `multisearch` command, can you rebase it due to the newly merged PR: https://github.com/opensearch-project/sql/pull/4512? Thanks! + + +### @LantaoJin + + +can u help to fix the typo in https://github.com/ritvibhatt/sql/blob/update-docs/docs/user/ppl/functions/condition.rst#regexp-match +``` +Usage: regex_match(string, pattern) returns .. +``` +change to +``` +Usage: regexp_match(string, pattern) returns ... +``` + + +### @ritvibhatt + + +> can u help to fix the typo in [ritvibhatt/sql@`update-docs`/docs/user/ppl/functions/condition.rst#regexp-match](https://github.com/ritvibhatt/sql/blob/update-docs/docs/user/ppl/functions/condition.rst?rgh-link-date=2025-11-19T04%3A13%3A13Z#regexp-match) +> +> ``` +> Usage: regex_match(string, pattern) returns .. +> ``` +> +> change to +> +> ``` +> Usage: regexp_match(string, pattern) returns ... +> ``` + +@LantaoJin Updated to be `regexp_match`! + + +--- + +# PR #4561: Update maven repo url + +**URL:** https://github.com/opensearch-project/sql/pull/4561 + +**Author:** @ahkcs + +**Created:** 2025-10-15T00:01:34Z + +**State:** MERGED + +**Merged:** 2025-10-15T03:09:59Z + +**Changes:** +16 -13 (3 files) + + +## Description + +### Description +Update maven repo url to the latest + +Revert the previous PR: https://github.com/opensearch-project/sql/pull/4558 +The actual root cause is that the link for ci repo has changed, see related issue +### Related Issues +https://github.com/opensearch-project/opensearch-build/issues/5360#issuecomment-3398576171 + +This URL for ci repo has changed + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4558: Updating maven repo url + +**URL:** https://github.com/opensearch-project/sql/pull/4558 + +**Author:** @ahkcs + +**Created:** 2025-10-14T21:44:52Z + +**State:** MERGED + +**Merged:** 2025-10-14T23:52:50Z + +**Changes:** +12 -15 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Updating maven repo url to follow other repos: +https://github.com/opensearch-project/index-management/blob/main/build.gradle#L73-L79 + + + +## Reviews + + +### @dai-chen - APPROVED + + +Is this because of some changes from OpenSearch build team? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +> Is this because of some changes from OpenSearch build team? + +It seems like the previous link failed to fetch 2.19.0.0-snapshot from https://ci.opensearch.org/ci/dbc/snapshots/ repo +After syncing with @gaiksaya , I am updating this to follow other repo's behavior for 2.19-dev branch, may later apply to main as well if needed + + +### @ykmr1224 + + +Does this align with this? https://github.com/opensearch-project/opensearch-build/issues/5360 + + +### @ahkcs + + +> Does this align with this? [opensearch-project/opensearch-build#5360](https://github.com/opensearch-project/opensearch-build/issues/5360) + +The current 2.19-dev branch align with this issue but we lost access to this link since yesterday: https://ci.opensearch.org/ci/dbc/snapshots/org/opensearch/gradle/build-tools/2.19.0-SNAPSHOT/maven-metadata.xml +So we are falling back to using `aws.oss` repo as a workaround for now + + +--- + +# PR #4557: [Backport 2.19-dev] Add more examples to the `where` command doc + +**URL:** https://github.com/opensearch-project/sql/pull/4557 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-14T21:29:06Z + +**State:** MERGED + +**Merged:** 2025-10-17T07:26:21Z + +**Changes:** +134 -3 (1 files) + + +## Description + +Backport 02ee33e4a39b8e764dd74b1b4e47bb0e739b87d5 from #4457. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4556: Fix JsonExtractAllFunctionIT failure + +**URL:** https://github.com/opensearch-project/sql/pull/4556 + +**Author:** @ykmr1224 + +**Created:** 2025-10-14T21:20:13Z + +**State:** MERGED + +**Merged:** 2025-10-15T00:20:39Z + +**Changes:** +2 -4 (1 files) + +**Labels:** `maintenance`, `backport-failed` + + +## Description + +### Description +- Fix JsonExtractAllFunctionIT failure due to conflicting change: https://github.com/opensearch-project/sql/pull/4501 + - signature of `CalcitePlanContext.create` method was changed + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +The backport will require https://github.com/opensearch-project/sql/pull/4535 and https://github.com/opensearch-project/sql/pull/4548 to be merged first. + + +--- + +# PR #4555: [Backport 2.19-dev] [Enhancement] Error handling for illegal character usage in java regex named capture group + +**URL:** https://github.com/opensearch-project/sql/pull/4555 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-14T18:27:00Z + +**State:** MERGED + +**Merged:** 2025-10-15T10:12:25Z + +**Changes:** +285 -24 (6 files) + + +## Description + +Backport 0b7e86cd203059081941bc94599b03f1d7642497 from #4434. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4554: Enhance dynamic source clause to support metadata only filters + +**URL:** https://github.com/opensearch-project/sql/pull/4554 + +**Author:** @vamsimanohar + +**Created:** 2025-10-14T18:22:20Z + +**State:** MERGED + +**Merged:** 2025-10-28T16:17:06Z + +**Changes:** +200 -64 (2 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + +**Assignees:** @vamsimanohar + + +## Description + +### Description +This PR is a follow up PR for https://github.com/opensearch-project/sql/pull/4116 +Where we support dynamic source clauses with only metadata filters. + +We can already achieve this by writing +source = [`*`, filterIndex in (httpstatus) ] + +With this PR, this can be simplified to + +source = [filterIndex in (httpstatus) ] +If there is no source reference, this assumes all indices. + + + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4548: [Backport 2.19-dev] Add JSON_EXTRACT_ALL internal function for Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4548 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-14T15:37:00Z + +**State:** MERGED + +**Merged:** 2025-10-15T23:04:15Z + +**Changes:** +955 -0 (6 files) + +**Assignees:** @ykmr1224 + + +## Description + +Backport 9c97cfbc8c5540407522a623511acd71db11a775 from #4489. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +The backport CI failed even with latest code base, @ykmr1224 please fix it manually. + + +--- + +# PR #4546: Fix sort push down into agg after project already pushed + +**URL:** https://github.com/opensearch-project/sql/pull/4546 + +**Author:** @qianheng-aws + +**Created:** 2025-10-14T10:50:45Z + +**State:** MERGED + +**Merged:** 2025-10-15T08:40:41Z + +**Changes:** +81 -23 (10 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix sort push down into agg after project already pushed. + +As described in the issue, these codes +``` +IntStream.range(0, buckets.size()) + .mapToObj(fieldNames::get) + .filter(name -> !selected.contains(name)) + .forEach(name -> { + newBuckets.add(buckets.get(bucketNames.indexOf(name))); + newBucketNames.add(name); + }); +``` +has bug of finding incorrect bucket after a project pushed down into agg and then is not able to push down the sort. + +### Related Issues +Resolves #4529 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown4.json:None` + + +can you format this file to yaml? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.json:None` + + +can you change this to yaml + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.yaml:14` + + +what is `PROJECT->[count(), state]` meaning? The DSL seems no change, just remove `EnumerableCalc`? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.yaml:14` + + +Yes + + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown4.json:None` + + +done + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.json:None` + + +done + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4545: [Backport 2.19-dev] Update request builder after pushdown sort into agg buckets (#4541) + +**URL:** https://github.com/opensearch-project/sql/pull/4545 + +**Author:** @LantaoJin + +**Created:** 2025-10-14T10:15:48Z + +**State:** MERGED + +**Merged:** 2025-10-15T06:10:05Z + +**Changes:** +74 -15 (4 files) + + +## Description + +(cherry picked from #4541 commit fe6247277f370bad84f5a70f6445f7dc9774e207) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4544: Make composite bucket size configurable + +**URL:** https://github.com/opensearch-project/sql/pull/4544 + +**Author:** @LantaoJin + +**Created:** 2025-10-14T10:03:56Z + +**State:** MERGED + +**Merged:** 2025-10-17T02:39:12Z + +**Changes:** +147 -18 (15 files) + +**Labels:** `enhancement`, `aggregation`, `backport-manually`, `backport-failed`, `performance`, `backport 2.19-dev` + + +## Description + +### Description +Make composite bucket size configurable with configuration: `plugins.query.buckets` +The default value is same as `plugins.query.size_limit` (10000). + +### Related Issues +Resolves #4517 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +cc @penghuo @yuancu + + +--- + +# PR #4542: [Backport 2.19-dev] Fix percentile bug + +**URL:** https://github.com/opensearch-project/sql/pull/4542 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-14T08:53:29Z + +**State:** MERGED + +**Merged:** 2025-10-15T10:12:13Z + +**Changes:** +21 -1 (2 files) + + +## Description + +Backport 8de0386bfd96813be5bcc5b8fdd8e312568e8a7f from #4539. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4541: Update request builder after pushdown sort into agg buckets + +**URL:** https://github.com/opensearch-project/sql/pull/4541 + +**Author:** @LantaoJin + +**Created:** 2025-10-14T08:49:58Z + +**State:** MERGED + +**Merged:** 2025-10-14T10:10:28Z + +**Changes:** +74 -15 (4 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +The PushdownContext is not cloned after creating the action with `pushdownSortIntoAggBuckets()`. It could make the `OpenSearchRequestBuilder` out of date, then lead to incorrect results described #4529 + +### Related Issues +Resolves #4529 and #4540 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4539: Fix percentile bug + +**URL:** https://github.com/opensearch-project/sql/pull/4539 + +**Author:** @xinyual + +**Created:** 2025-10-14T05:37:29Z + +**State:** MERGED + +**Merged:** 2025-10-14T08:41:08Z + +**Changes:** +21 -1 (2 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Fix the mirror bug in percentile UDAF. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] +#4460 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java:None` + + +Does this case cover the scenario where the argument of percentile is a decimal as that in the original issue does? + +``` +source=opensearch_dashboards_sample_data_logs +| eval RamMB=CEIL(machine.ram/1000000.0) +| stats bucket_nullable=false MIN(RamMB) as Min_RAM_MB, percentile(RamMB, 80) as P80_RAM_MB +``` + + +### @xinyual on `integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java:None` + + +Thanks for your notification. Already change the IT. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4537: Check server status before starting Prometheus + +**URL:** https://github.com/opensearch-project/sql/pull/4537 + +**Author:** @LantaoJin + +**Created:** 2025-10-14T04:49:39Z + +**State:** MERGED + +**Merged:** 2025-10-14T10:24:52Z + +**Changes:** +28 -2 (2 files) + +**Labels:** `infrastructure`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Check server status before starting Prometheus: +Integ-test and doc-test are all need to start a OpenSearch cluster which depends on a running prometheus server. Sometimes we need to run integration tests in IDE, but face failed to start OpenSearch cluster due to prometheus server is running. + +### Related Issues +Resolves #4536 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +It may continously fail to start prometheus when there are other programs also using prometheus at a different port. But this is a very minor case and can be ignored. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +> It may continously fail to start prometheus when there are other programs also using prometheus at a different port. But this is a very minor case and can be ignored. + +the port is fixed in prometheus.yml +``` +static_configs: + - targets: ["localhost:9090"] +``` + + +--- + +# PR #4535: [Backport 2.19-dev] Add configurable sytem limitations for `subsearch` and `join` command (#4501) + +**URL:** https://github.com/opensearch-project/sql/pull/4535 + +**Author:** @LantaoJin + +**Created:** 2025-10-14T02:58:58Z + +**State:** MERGED + +**Merged:** 2025-10-15T08:24:19Z + +**Changes:** +1723 -80 (53 files) + + +## Description + +(cherry picked from #4501 commit fddbb705a6aeae138915e2174d5d7ea3ccbd3e9e) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4534: [FollowUp] Set 0 and negative value of subsearch.maxout as unlimited + +**URL:** https://github.com/opensearch-project/sql/pull/4534 + +**Author:** @LantaoJin + +**Created:** 2025-10-14T02:48:34Z + +**State:** MERGED + +**Merged:** 2025-10-14T04:29:03Z + +**Changes:** +28 -54 (12 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Followup of #4501 : Set 0 and negative value of subsearch.maxout both as unlimited +But only callout the `0` as unlimited in user doc. + +Also, fix the conflicts of merging. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4531: Add `per_minute`, `per_hour`, `per_day` function support + +**URL:** https://github.com/opensearch-project/sql/pull/4531 + +**Author:** @dai-chen + +**Created:** 2025-10-13T21:38:30Z + +**State:** MERGED + +**Merged:** 2025-10-24T16:49:33Z + +**Changes:** +402 -10 (9 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR extends `timechart` command's `per_*` function support (introduced in https://github.com/opensearch-project/sql/pull/4464) by adding three additional `per_*` functions that normalize metrics to different time units: + +- per_minute(field): Calculates the per-minute rate (sum-per-second multiplies by 60) +- per_hour(field): Calculates the per-hour rate (sum-per-second multiplies by 3600) +- per_day(field): Calculates the per-day rate (sum-per-second multiplies by 86400) + +**TODO**: After https://github.com/opensearch-project/sql/issues/4550 is resolved, migrate from the current second-based approach to millisecond-based calculations, e.g., `timestampdiff(MILLISECOND, span_start, span_end)` to support millisecond span. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4350 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - DISMISSED + + +Hi @dai-chen , thanks for the change, and it is LGTM. Just left a very minor question on the doc format. + + +### @ykmr1224 - DISMISSED + + +LGTM other than minor comment. + + +## Review Comments + + +### @RyanL1997 on `docs/user/ppl/cmd/timechart.rst:72` + + +Is this change expected? By removing the section header? + + +### @dai-chen on `docs/user/ppl/cmd/timechart.rst:72` + + +I was thinking make it lower level section. Changed it back and removed version note to align with our new doc standard in https://github.com/opensearch-project/sql/pull/4531/commits/28bb1a91a301018007ce7dd7a2d4d2c6995df9b0. Thanks! + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/ast/tree/TimechartTest.java:None` + + +nit: Is it possible to make it a constant to reduce duplicates? + + +### @dai-chen on `core/src/test/java/org/opensearch/sql/ast/tree/TimechartTest.java:None` + + +Extracted into a single argument method in https://github.com/opensearch-project/sql/pull/4531/commits/b7acc98006828ca289574932240214d35cff228b. Thanks! + + +## General Comments + + +### @dai-chen + + +@RyanL1997 @ykmr1224 Addressed comments and merged from main. + +Waiting for CI complete. Please re-approve. Thanks! + + +--- + +# PR #4530: [Backport 2.19-dev] Add `per_second` function support for `timechart` command + +**URL:** https://github.com/opensearch-project/sql/pull/4530 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-13T19:52:49Z + +**State:** MERGED + +**Merged:** 2025-10-15T18:30:43Z + +**Changes:** +622 -14 (17 files) + +**Labels:** `enhancement`, `PPL` + + +## Description + +Backport 4d416dbd927ce853a878be22175f13d5b2204f29 from #4464. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +``` + > Could not resolve org.opensearch.gradle:build-tools:2.19.0-SNAPSHOT. + > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/org/opensearch/gradle/build-tools/2.19.0-SNAPSHOT/maven-metadata.xml. + > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/org/opensearch/gradle/build-tools/2.19.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden +``` +The CI is failing on the getting the snapshot. Seems like there are some issues for the maven repo. + + +### @dai-chen + + +> ``` +> > Could not resolve org.opensearch.gradle:build-tools:2.19.0-SNAPSHOT. +> > Unable to load Maven meta-data from https://ci.opensearch.org/ci/dbc/snapshots/org/opensearch/gradle/build-tools/2.19.0-SNAPSHOT/maven-metadata.xml. +> > Could not GET 'https://ci.opensearch.org/ci/dbc/snapshots/org/opensearch/gradle/build-tools/2.19.0-SNAPSHOT/maven-metadata.xml'. Received status code 403 from server: Forbidden +> ``` +> +> The CI is failing on the getting the snapshot. Seems like there are some issues for the maven repo. + +Yeah, retried but failed again. Let me wait and retry after a while. Thanks! + + +--- + +# PR #4528: [Backport 2.19-dev] Add MAP_CONCAT internal function + +**URL:** https://github.com/opensearch-project/sql/pull/4528 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-13T19:04:50Z + +**State:** MERGED + +**Merged:** 2025-10-13T22:03:12Z + +**Changes:** +183 -0 (3 files) + + +## Description + +Backport ef783f142830e5be5f91767cd60dcf1317480ab3 from #4477. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4524: [Backport 2.19-dev] Fix mapping after aggregation push down #4500 + +**URL:** https://github.com/opensearch-project/sql/pull/4524 + +**Author:** @qianheng-aws + +**Created:** 2025-10-13T02:57:58Z + +**State:** MERGED + +**Merged:** 2025-10-13T06:01:20Z + +**Changes:** +74 -34 (8 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/9257889a17f7301523a586c91833fa8331f4ef73 from https://github.com/opensearch-project/sql/pull/4500. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4523: [Backport 2.19-dev] Implement validation for case statement in count(eval) expression + +**URL:** https://github.com/opensearch-project/sql/pull/4523 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-13T02:25:18Z + +**State:** MERGED + +**Merged:** 2025-10-13T06:01:59Z + +**Changes:** +51 -1 (2 files) + + +## Description + +Backport f8767e0926e3cb41d264fc1292c3718f88e98a7b from #4520. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4522: Including metadata fields type when doing agg/filter script push down + +**URL:** https://github.com/opensearch-project/sql/pull/4522 + +**Author:** @qianheng-aws + +**Created:** 2025-10-11T09:55:47Z + +**State:** MERGED + +**Merged:** 2025-10-14T09:42:45Z + +**Changes:** +65 -30 (11 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Including metadata fields type when doing agg/filter script push down, as to avoid NPE when there is metadata fields in the script. + +### Related Issues +Resolves #4513 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java:157` + + +is this issue existing in v2? + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4513.yml:41` + + +no issue for ppl `source=test | eval id_int = cast(_id as int) | sort id_int`? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java:157` + + +No, v2 script doesn't rely on the field types. + + +### @qianheng-aws on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4513.yml:41` + + +The sort operator on derived field isn't able to be pushed down for now. + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4513.yml:41` + + +then what if `source=test | eval id_int = cast(_id as int) | stats count() by span(id_int, 100)` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4520: Implement validation for case statement in count(eval) expression + +**URL:** https://github.com/opensearch-project/sql/pull/4520 + +**Author:** @yuancu + +**Created:** 2025-10-11T02:56:46Z + +**State:** MERGED + +**Merged:** 2025-10-13T02:24:51Z + +**Changes:** +51 -1 (2 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description + +`count(eval( {condition} ))` is used to count rows that satisfies a certain criteria. It is translated to a CASE statement when visiting AST: +```java + // AstExpressionBuilder.java + @Override + public UnresolvedExpression visitEvalExpression(EvalExpressionContext ctx) { + /* + * Rewrite "eval(p)" as "CASE WHEN p THEN 1 ELSE NULL END" so that COUNT or DISTINCT_COUNT + * can correctly perform filtered counting. + * Note: at present only eval() inside counting functions is supported. + */ + UnresolvedExpression predicate = visit(ctx.logicalExpression()); + return AstDSL.caseWhen(null, AstDSL.when(predicate, AstDSL.intLiteral(1))); + } +``` + +But the validation of its parameter is skipped since it is later directly converted to a CASE call: + +```java + // CalciteRexNodeVisitor.java + @Override + public RexNode visitCase(Case node, CalcitePlanContext context) { + ... + return context.rexBuilder.makeCall(SqlStdOperatorTable.CASE, caseOperands); + } +``` + +This PR checks the type of the condition and throws an `ExpressionEvaluationException` exception when the condition of case are not of boolean types. + +### Related Issues +Resolves #4272 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4515: Add MAP_APPEND internal function to Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4515 + +**Author:** @ykmr1224 + +**Created:** 2025-10-10T23:24:45Z + +**State:** MERGED + +**Merged:** 2025-10-15T20:08:07Z + +**Changes:** +557 -29 (9 files) + +**Labels:** `PPL`, `feature`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Add MAP_APPEND internal function to Calcite PPL + - This function is needed to implement `spath` function to merge existing dynamic fields and newly extracted fields by `spath` + - Internally share logic with `mvappend` to align the behavior + - All the items will be array for consistency +- This function is internal and won't be exposed as PPL function + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4433 +- https://github.com/opensearch-project/sql/issues/4112 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLRelNodeIntegTestCase.java:32` + + +This class is overwrapping with other PRs, and will be eliminated by rebase once one is merged. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendCore.java:18` + + +Clarify null/empty semantics in Javadoc for future readers + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendCore.java:18` + + +Added comment. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4512: Fixes for `Multisearch` and `Append` command + +**URL:** https://github.com/opensearch-project/sql/pull/4512 + +**Author:** @ahkcs + +**Created:** 2025-10-10T18:59:18Z + +**State:** MERGED + +**Merged:** 2025-10-28T20:23:01Z + +**Changes:** +171 -251 (9 files) + +**Labels:** `bug`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +1. Type Conflict Handling + - Changed behavior: Type conflicts now throw IllegalArgumentException instead of auto-renaming fields + - Modified: SchemaUnifier.java - Removed automatic field renaming logic (e.g., age → age0) + - Updated Tests: + - CalciteMultisearchCommandIT.testMultisearchWithDirectTypeConflict - Now expects exception + - CalcitePPLAppendCommandIT.testAppendWithConflictTypeColumn - Now expects exception + - Documentation: Updated multisearch.rst and append.rst to reflect new behavior and add Limitations section + + 2. Timestamp Interleaving + - Modified: CalciteRelNodeVisitor.findTimestampField() - Now only detects @timestamp field for timestamp interleaving + - Other timestamp fields (_time, timestamp, time) are no longer used for interleaving + + 3. Documentation Fixes + - multisearch.rst: + - Added Limitations section + - Removed redundant Example 4 (Handling Empty Results) + - Removed Example 6 (type-conflict resolution) + - append.rst: + - Added Limitations section + - Removed Example 3 (type conflict example no longer valid) + +### Related PRs + +#4332 #4123 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:None` + + +I think we should rather raise error from here. (Then we don't need to check again) +Later we would implement a logic to generalize types for the same field name here. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:None` + + +Thanks for the suggestion! I think it makes sense, moved the location to raise error + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:76` + + +We recently find another issue of type conflicts here. RelDataType evaluates the hash equality by its digested string as well. For example, "INTEGER" is not equal to "INTEGER NOT NULL". A quick fix would be aligning the same SqlType to be nullable. Ideally it won't affect the data type resolution while execution. cc @xinyual + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:76` + + +We can consider to allow same SqlTypeName but with different nullability to be merged here. + + +### @songkant-aws on `docs/user/ppl/cmd/append.rst:30` + + +Understand the intention here. Strong schema engine like SQL restricts the type to be the same. Some weak schema engine resolves types at runtime and doesn't care the data type. I think it's not easy to make it compatible. + +Not sure what's better user experience and customer expectation here. Does user accept this behavior or expect to union anyway? cc @LantaoJin + + +### @ahkcs on `docs/user/ppl/cmd/append.rst:30` + + +We are planning to enable permissive mode in the future: #4349 to support schema merging with type conflicts, in order to avoid breaking changes in the future, we are marking this as a limitation now instead of using a workaround. cc @penghuo + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:76` + + +Thanks for the suggestion! I have updated the implementation to allow same SqlTypeName but with different nullability to be merged + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1816` + + +issue (non-blocking): Should we think about indices with different timestamp field names? + + +### @Swiddis on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java:None` + + +suggestion: This error message isn't very clear + +Users might not know what schema unification means, and we don't say the nature of the conflict. I'd like to see something more like: + +``` +Unable to process column 'age' due to incompatible types: 'integer' and 'text'. +``` + + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1816` + + +Currently it is set as a limitation: we want to only support @timestamp + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java:None` + + +Updated error message + + +### @songkant-aws on `docs/user/ppl/cmd/append.rst:30` + + +Got it. Thanks for the change. + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:95` + + +There is another concern of using this method to allow type merge. If index A's RelDataType 'INTEGER NOT NULL' is put to unified schema, index B's same name RelDataType 'INTEGER' will be merged silently. Index B's column values could contain NULL values. + +The generated code could ignore the null check because the merged unified schema has 'iNTEGER NOT NULL'. It will probably throw NPE when merging index B's NULL values. We could write some query to double check if we can reproduce this scenario. + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java:95` + + +I tried some queries in my local test. Haven't seen such NPE errors yet. Not sure if there is edge case. But for now we can leave it there until needed fix. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4511: Add MAP_REMOVE internal function for Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4511 + +**Author:** @ykmr1224 + +**Created:** 2025-10-10T16:59:11Z + +**State:** MERGED + +**Merged:** 2025-10-16T16:16:10Z + +**Changes:** +495 -3 (6 files) + +**Labels:** `PPL`, `feature`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Add internal MAP_REMOVE function for Calcite PPL + - This function takes map and multiple keys to eliminate keys in single call. + - This will be used for dynamic fields implementation, where we need to eliminate duplicate fields from `_MAP` to keep static/dynamic fields consistent. + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4433 +- https://github.com/opensearch-project/sql/issues/4112 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +could we marked field as removed instead of really remove it from map? we can evaluate it during perf test stage. + + +### @ykmr1224 + + +> could we marked field as removed instead of really remove it from map? we can evaluate it during perf test stage. + +It should be possible, but it would complicate the map related operations, since we need several operations to consider the removed fields. + + +--- + +# PR #4510: [Backport 2.19-dev] Add data anonymizer for spath command + +**URL:** https://github.com/opensearch-project/sql/pull/4510 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T16:29:47Z + +**State:** MERGED + +**Merged:** 2025-10-10T18:34:14Z + +**Changes:** +29 -0 (3 files) + + +## Description + +Backport fdb09e86c8b6c4f64babc0609f13015f351d28f6 from #4479. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4509: [Backport 2.19-dev] Support eval returns decimal division result instead of integer + +**URL:** https://github.com/opensearch-project/sql/pull/4509 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T16:25:03Z + +**State:** MERGED + +**Merged:** 2025-10-11T02:09:48Z + +**Changes:** +201 -60 (6 files) + + +## Description + +Backport b170cf121c605a32148fd3e5d1d2100fff1fba35 from #4440. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4506: [Backport 3.3] Revert partial of #4401 + +**URL:** https://github.com/opensearch-project/sql/pull/4506 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T14:49:28Z + +**State:** MERGED + +**Merged:** 2025-10-10T15:19:37Z + +**Changes:** +0 -1 (1 files) + +**Labels:** `bug` + + +## Description + +Backport 095e8cf6b916902e5e0c4a94e3f1eededc3629da from #4503. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4505: [Backport 2.19-dev] Revert partial of #4401 + +**URL:** https://github.com/opensearch-project/sql/pull/4505 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T14:49:26Z + +**State:** MERGED + +**Merged:** 2025-10-10T16:30:03Z + +**Changes:** +0 -1 (1 files) + + +## Description + +Backport 095e8cf6b916902e5e0c4a94e3f1eededc3629da from #4503. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4504: [Backport 3.3] Fix join type ambiguous issue when specify the join type with sql-like join criteria + +**URL:** https://github.com/opensearch-project/sql/pull/4504 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T13:57:06Z + +**State:** MERGED + +**Merged:** 2025-10-10T14:41:09Z + +**Changes:** +23 -1 (2 files) + +**Labels:** `bug` + + +## Description + +Backport 3d2043d4d02d035b2cd41345823535545d4e1f16 from #4474. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4503: Revert partial of #4401 + +**URL:** https://github.com/opensearch-project/sql/pull/4503 + +**Author:** @LantaoJin + +**Created:** 2025-10-10T13:52:49Z + +**State:** MERGED + +**Merged:** 2025-10-10T14:49:13Z + +**Changes:** +0 -1 (1 files) + +**Labels:** `bug`, `data-correctness`, `backport 2.19-dev`, `backport 3.3` + + +## Description + +### Description +`./gradlew ':integ-test:integTest' --tests 'org.opensearch.sql.calcite.remote.CalciteStatsCommandIT.testStatsNestedDoubleValue'` failed with +``` + Expected: iterable with items [[60.342]] in any order + but: not matched: <[60.34199]> +``` +due to https://github.com/opensearch-project/sql/pull/4401 + +Fixing: All numeric computation in runtime should use `Double` instead of `BigDecimal`. Revert partial of #4401 + +### Related Issues +Resolves #4502 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @qianheng-aws - APPROVED + + +no test? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +> no test? + +`org.opensearch.sql.calcite.remote.CalciteStatsCommandIT.testStatsNestedDoubleValue` + + +--- + +# PR #4501: Add configurable sytem limitations for `subsearch` and `join` command + +**URL:** https://github.com/opensearch-project/sql/pull/4501 + +**Author:** @LantaoJin + +**Created:** 2025-10-10T08:59:28Z + +**State:** MERGED + +**Merged:** 2025-10-14T02:23:12Z + +**Changes:** +1711 -64 (52 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Add two configurable limitations for PPL. +1. plugins.ppl.subsearch.maxout (default value 10000, similar to `maxout` in [subsearch], [ref](https://help.splunk.com/en/splunk-enterprise/administer/admin-manual/10.0/configuration-file-reference/10.0.1-configuration-file-reference/limits.conf#subsearch-0)) +2. plugins.ppl.join.subsearch_maxout (default value 50000, similar to `subsearch_maxout` in [join], [ref](https://help.splunk.com/en/splunk-enterprise/administer/admin-manual/10.0/configuration-file-reference/10.0.1-configuration-file-reference/limits.conf#join-0)) + +### Related Issues +Resolves #3731 and #4430 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:209` + + +nit, revert unncessary format change. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +nit, is it possible to avoid access private method? + +2cents, Add a frame in CalcitePlanContext, frame is boundary of subsearch, and define limit on frame. When visit subsearch, append LogicalSystemLimit to subsearch on each frame. + + +### @penghuo on `docs/user/ppl/admin/settings.rst:None` + + +Remove Rollback, https://github.com/opensearch-project/sql/pull/4449 fix it. + + +### @penghuo on `docs/user/ppl/admin/settings.rst:293` + + +What if set to 0? Join/Subquery will be optimzied by Calcite? + + +### @LantaoJin on `docs/user/ppl/admin/settings.rst:293` + + +For now, I followed the standard behaviour in database: +``` +select * from t_outer where exists (select 1 from t_inner where t_outer.id = t_inner.id limit 0); +select * from t_outer where id in (select id from t_inner limit 0); +select * from t_outer where id = (select id from t_inner limit 0); +select * from t_outer where id = (select count(*) from t_inner limit 0); +``` +All above queries return empty in SQL (postgresql). + +The implementation is here https://github.com/opensearch-project/sql/pull/4501/files#diff-e5198d773af75bf3173ef25676a2803a0091cb51e32d6ae30241273519d30261R601-R605 + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +> nit, is it possible to avoid access private method? + +I don't think so. + +When visit the subsearch side (right in join for example), the right plan was pushed to stack. +``` +public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) { + return unresolved.accept(this, context); + } +``` +`RelBuilder.pop()` is private either. So we don't have a way to replace it. + +Here was my previous try code for join +``` + public RelNode visitJoin(Join node, CalcitePlanContext context) { + // visit the main side + analyze(node.getLeft(), context); + if (context.sysLimit.joinSubsearchLimit() >= 0) { + // add join.subsearch_maxout limit to subsearch side + RelNode withLimit = context.relBuilder.with( + analyze(node.getRight(), context), + r -> LogicalSystemLimit.create( + SystemLimitType.JOIN_SUBSEARCH_MAXOUT, + r.peek(), + r.literal(context.sysLimit.joinSubsearchLimit()))); + context.relBuilder.push(withLimit); // push the new subsearch plan + } else { + // visit the subsearch side + analyze(node.getRight(), context); + } +``` + +The code use `relBuilder.with()`, but the first parameter `analyze(node.getRight(), context)` will push the subsearch to stack, and the `with()` method push it twice. +``` + /** Evaluates an expression with a relational expression temporarily on the + * stack. */ + public E with(RelNode r, Function fn) { + try { + push(r); + return fn.apply(this); + } finally { + stack.pop(); + } + } +``` +1. push left plan by `analyze(node.getLeft(), context)`, stack size is 1 +2. push right plan by the first parameter of `with`(`analyze(node.getRight(), context)`), stack size is 2 +3. push duplicated right plan by `push` in `with`, stack size is 3 +4. pop duplicated right plan by `pop` in `with`, stack size is 2 +5. push new right plan by `context.relBuilder.push(withLimit)`, stack size is 3 (incorrect) + + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:209` + + +Didn't get the point. This change is necessary, from `context.querySizeLimit` to `context.sysLimit.querySizeLimit()`. + + +### @LantaoJin on `docs/user/ppl/admin/settings.rst:None` + + +done + + +### @LantaoJin on `docs/user/ppl/admin/settings.rst:293` + + +What's your thoughts, set both `0` and negative value to unlimited? @penghuo + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Does it work by using `relbuilder.build() + relbuilder.push(newTop)`? `relbuilder.build()` will do pop while public. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +[question]Will there be case that there is join or union in subsearch? In those case there will be more than 1 input for the specific operators? If so, the current code will construct incorrect plan. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +SQL support correlate condition for in subquery or scalar subquery. So Calcite should support them as well. +e.g. +``` +SELECT * FROM EMPLOYEE WHERE location in (select location from DEPART where EMPLOYEE.dept = DEPART.name) limit 1 +``` + +If there is correlate condition for in or scalar subsearch, shall we do similar operation like above? + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:104` + + +Should there also be explaining tests for subsearch? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java:None` + + +Should the minimum be set to `-1` instead? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:104` + + +added + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java:None` + + +fixed + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Sync offline. We still cannot use `relbuilder.build() + relbuilder.push(newTop)` since it will empty the `fields of Frame`. +``` + private void replaceTop(RelNode node) { + final Frame frame = stack.pop(); + stack.push(new Frame(node, frame.fields)); // <--- frame.fields will be kept all the time + } +``` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Fixed in latest commit. For BiRel or SetOp, just return. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Added the same logic for correlated in-subquery. For correlated scalar-subquery, since there is always an aggregation will be perform in subquery, sysLimit is not necessary. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +How does SQL Join translate to RelNode? It use the private method? + + +### @penghuo on `docs/user/ppl/admin/settings.rst:293` + + +discussed offline, 0 and -1 means unlimited. + + +### @LantaoJin on `docs/user/ppl/admin/settings.rst:293` + + +> discussed offline, 0 and -1 means unlimited. + +sure, let me update the code and doc to +`0` means unlimited, and minValue=0 in Settings + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4500: Fix mapping after aggregation push down + +**URL:** https://github.com/opensearch-project/sql/pull/4500 + +**Author:** @qianheng-aws + +**Created:** 2025-10-10T08:55:56Z + +**State:** MERGED + +**Merged:** 2025-10-13T02:42:51Z + +**Changes:** +75 -34 (8 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Allow type updating/overriding when pushing aggregation and it has derived fields with the same name as fields in source. + +Also fix `WidthBucketFunction` to return correct type if the field is type of TIME/DATE/TIMESTAMP. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4115, https://github.com/opensearch-project/sql/issues/3458 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM. Can you link this PR to #3458 as well? It solves the mismatched type mapping when calcite is disabled. + + +## Review Comments + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4415.yml:42` + + +what's the result without this patch? is it a test about overwrite existing type? if yes, better to match the schema too. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java:88` + + +q: should we check `Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED` first? + + +### @LantaoJin on `opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java:None` + + +what is the issue 4413? can you attach the link? + + +### @qianheng-aws on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4415.yml:42` + + +Corrected the issue link, https://github.com/opensearch-project/sql/issues/4115. It will throw exception without this PR + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java:88` + + +This is a bug actually. We'd better always have this change despite of what this configuration is. + + +### @qianheng-aws on `opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java:None` + + +Done + + +### @qianheng-aws on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4415.yml:42` + + +Also add the schema verification in the commit https://github.com/opensearch-project/sql/pull/4500/commits/acecdd1d3a62a6205419832ea41efe1505829473 + + +## General Comments + + +### @LantaoJin + + +can you update the `resolve # ` section? why resolve a merged pr? you mean a followup? + + +--- + +# PR #4498: [Backport 2.19-dev]Fallback to sub-aggregation if composite aggregation doesn't support #4413 + +**URL:** https://github.com/opensearch-project/sql/pull/4498 + +**Author:** @qianheng-aws + +**Created:** 2025-10-10T06:45:58Z + +**State:** MERGED + +**Merged:** 2025-10-10T08:52:17Z + +**Changes:** +400 -119 (18 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/f7be0128ae9661189deaf8a81d4b35a736742e04 from https://github.com/opensearch-project/sql/pull/4413. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4497: PPL tostring() implementation issue #4492 + +**URL:** https://github.com/opensearch-project/sql/pull/4497 + +**Author:** @asifabashar + +**Created:** 2025-10-10T05:54:16Z + +**State:** MERGED + +**Merged:** 2025-11-05T19:02:33Z + +**Changes:** +592 -0 (9 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description +Is your feature request related to a problem? +SPL allows tostring which implemented here. +part of RFC #4287 - tostring implementation + +What solution would you like? +Function: tostring(,) +Description +This function converts a value to a string. If the value is a number, this function reformats it as a string. If the value is a Boolean value, it returns the corresponding string value, "True" or "False". +Usage +You can use this function with the eval commands and as part of eval expressions. +The value argument can be a field name or a value. + +Number is accepted as input to the function. + +When you use the tostring function with the eval command, the returned values might not sort as expected. Use the tostring function to format the displayed values. + +The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. + +Format Description +"binary" Converts a number to a binary value. +"hex" Converts the number to a hexadecimal value. +"commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. +"duration" Converts the value in seconds to the readable time format HH:MM:SS. +Binary conversion +You can use this function to convert a number to a string of its binary representation. For example, the result of the following function is 1001, because the binary representation of 9 is 1001.: +eval result = tostring(9, "binary") + +For information about bitwise functions that you can use with the tostring function, see Bitwise functions. + +Basic examples +The following example returns "True 0xF 12,345.68". +... | eval n=tostring(1==1) + " " + tostring(15, "hex") + " " + tostring(12345.6789, "commas") +The following example returns foo=615 and foo2=00:10:15. The 615 seconds is converted into minutes and seconds. + +... | eval foo=615 | eval foo2 = tostring(foo, "duration") +The following example formats the column totalSales to display values with a currency symbol and commas. You must use a period between the currency value and the tostring function. + +### Related Issues +Resolves #4492 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/functions/string.rst:None` + + +clarify syntax grammer of tostring function, it should accept ANY valide data type right? It also should support option as second paramaters. + + +### @penghuo on `docs/user/ppl/functions/string.rst:None` + + +Reformat doc. https://github.com/asifabashar/sql/blob/e4b11d9b405cb625b7cd898828b538b50bb0dded/docs/user/ppl/functions/string.rst#tostring + + +### @penghuo on `docs/user/ppl/functions/string.rst:None` + + +reformat doc. + + +### @penghuo on `docs/user/ppl/functions/string.rst:None` + + +Reformat doctest, follow example of other functions. + + +### @penghuo on `async-query-core/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +revert, is not releated to opensearch-ppl + + +### @penghuo on `async-query-core/src/main/antlr/OpenSearchPPLParser.g4:None` + + +revert, is not releated to opensearch-ppl + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:1219` + + +Is better to define as dataTypeFunctionCall, similar to cast, toInt, toXXX. + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:None` + + +is it possible to leverage sql cast function for this case? tostring(1=1) equal to cast (1=1) to string? + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:None` + + +expectedPhysical -> expectedResult + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:None` + + +expectedPhysical -> expectedResult + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:None` + + +expectedPhysical -> expectedResult + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:276` + + +Does calcite already have support his? FORMAT_TIME? + + +### @penghuo on `language-grammar/src/main/antlr4/OpenSearchSQLParser.g4:None` + + +Ignore SQL. not releated + + +### @penghuo on `language-grammar/src/main/antlr4/OpenSearchSQLLexer.g4:None` + + +Ignore SQL. not releated + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/DSL.java:None` + + +does this tostring function been used? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java:37` + + +Add Unit Test. + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:276` + + +It has FORMAT_TIME, the intention was to provide compatible to [SPL in Splunk ](https://help.splunk.com/en/splunk-enterprise/search/spl-search-reference/9.2/evaluation-functions/conversion-functions#:~:text=tonumber(trim(celsius))-,tostring,-(%3Cvalue%3E%2C%3Cformat%3E)) + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:276` + + +It has FORMAT_TIME, in this case, the intention was to add similar compatible function with same name in [SPL](https://help.splunk.com/en/splunk-enterprise/search/spl-search-reference/9.2/evaluation-functions/conversion-functions#:~:text=two%20decimal%20places.-,tostring,-(%3Cvalue%3E%2C%22duration%22)). Is it ok or I need to remove this functionality + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:None` + + +Tried following : "source=EMP | eval boolean_value1 = cast(1=1)| eval boolean_value2 = tostring(1==1) | fields boolean_value1, boolean_value2 |head 1" , following exeption as cast (1==1) or cast (1=1) are return syntax exception org.opensearch.sql.common.antlr.SyntaxCheckException: [)] is not a valid term at this part of the query: '...an_value1 = cast(1=1)' <-- HERE. Expecting one of 21 possible tokens. Some examples: 'AS', 'IN', 'NOT', 'OR', 'AND', + + +### @asifabashar on `core/src/main/java/org/opensearch/sql/expression/DSL.java:None` + + +Removed this. + + +### @asifabashar on `docs/user/ppl/functions/string.rst:None` + + +clarified document, as there are two formats in splunk Search Processing Language where option 1 is (number, fromatname) as parameter and option 2 is just boolean value. Clarified more on that syntax as is in SPL. + + +### @asifabashar on `docs/user/ppl/functions/string.rst:None` + + +formatted doc, moved to conversion.rst + + +### @asifabashar on `docs/user/ppl/functions/string.rst:None` + + +formatted doc, moved to conversion.rst + + +### @asifabashar on `language-grammar/src/main/antlr4/OpenSearchSQLLexer.g4:None` + + +removed this change + + +### @asifabashar on `ppl/src/main/antlr/OpenSearchPPLParser.g4:1219` + + +moved from text function to dataTypeFunctionCall + + +### @asifabashar on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java:None` + + +made the change + + +### @asifabashar on `core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java:37` + + +added additional unit tests in ToStringFunctionTest.java + + +### @asifabashar on `docs/user/ppl/functions/string.rst:None` + + +reformatted + + +### @asifabashar on `docs/user/ppl/functions/string.rst:None` + + +moved to conversion.rst + + +### @asifabashar on `async-query-core/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +reverted + + +## General Comments + + +### @asifabashar + + +@penghuo Please review + + +### @asifabashar + + +@penghuo Thanks for your comments and review. I have made the changes, please check. + + +### @asifabashar + + +@penghuo please review + + +### @asifabashar + + +@penghuo please review + + +### @asifabashar + + +please review @penghuo + + +### @asifabashar + + +@penghuo please review + + +### @penghuo + + +@asifabashar Please resolve conflicts. + + +### @asifabashar + + +fixed merge conflicts + + +### @penghuo + + +> fixed merge conflicts + +@asifabashar +tostring docs missing after merge conflict, please double check. + + +### @asifabashar + + +sorry for missing, I have added back tostring doc + + +### @asifabashar + + +@penghuo please review, fixed merge conflict + + +### @asifabashar + + +fixed doctest , removed EMP + + +--- + +# PR #4495: [Backport 2.19-dev] Fix missing keywordsCanBeId + +**URL:** https://github.com/opensearch-project/sql/pull/4495 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T05:12:52Z + +**State:** MERGED + +**Merged:** 2025-10-10T09:09:57Z + +**Changes:** +69 -5 (3 files) + + +## Description + +Backport 4954cab39e0240c93479e5aed02e9e01fe9108c7 from #4491. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4494: [Backport 2.19-dev] Fix the bug of explicit makeNullLiteral for UDT fields + +**URL:** https://github.com/opensearch-project/sql/pull/4494 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-10T05:12:29Z + +**State:** MERGED + +**Merged:** 2025-10-10T09:09:52Z + +**Changes:** +102 -0 (3 files) + + +## Description + +Backport 7d6357d820ad92128490bbd4f3a44bb2905d15d4 from #4475. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4491: Fix missing keywordsCanBeId + +**URL:** https://github.com/opensearch-project/sql/pull/4491 + +**Author:** @LantaoJin + +**Created:** 2025-10-10T03:13:25Z + +**State:** MERGED + +**Merged:** 2025-10-10T04:58:52Z + +**Changes:** +69 -5 (3 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Follow keywords was not added to `keywordsCanBeId`: +``` +as +on +limit +overwrite +field +sed +label +aggregation +brain +simple_pattern +max_match +offset_field +savedsearch (deleted, never used) +datamodel (deleted, never used) +to (deleted, never used) +millisecond (deleted, never used) +``` + +### Related Issues +Resolves #4481 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4489: Add JSON_EXTRACT_ALL internal function for Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4489 + +**Author:** @ykmr1224 + +**Created:** 2025-10-10T00:27:54Z + +**State:** MERGED + +**Merged:** 2025-10-14T15:36:44Z + +**Changes:** +956 -0 (6 files) + +**Labels:** `PPL`, `feature`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Add JSON_EXTRACT_ALL internal function for Calcite PPL + - This function will be used by `spath` command without `path` param implementation. + - Extracts all the values from input JSON string, and store them into MAP with JSON path as the key. + - Example: input=`{"a": 1, "b": [2, 3, 4], "c": [{"e": 5}, {"e": 6}, {"f": 7}]}` output=`Map("a"=> 1, "b{}"=> [2,3,4], "c{}.e"=> [5, 6], "c{}.f"=> 7)` + - `{}` is a special notation to indicate array in the path. + - A value could be array or single value regardless the key ends with or contains `{}`. (see `c{}.e` and `c{}.f` in above example) + - Please check unit tests for detailed spec. +- We might extend it to allow specifying paths to collect only needed values for optimization. + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4433 +- https://github.com/opensearch-project/sql/issues/4112 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java:None` + + +Avoid exception, User parser.getNumberType? + + +### @penghuo on `core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java:None` + + +remove debug + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java:None` + + +Thanks, fixed! + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java:158` + + +Is this list copy intentional? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java:95` + + +Do we need to limit the depth of stack? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java:95` + + +This does not use recursion and I don't see much risk to allow any depth. (It would raise exception when it fails to allocate memory, though) + +Let me know if you see some specific risk. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java:158` + + +The `else` clause is for when existingValue is not a list, and will create new list in that case. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4488: [Backport 2.19-dev] change Anonymizer to mask PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4488 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-09T23:17:21Z + +**State:** MERGED + +**Merged:** 2025-10-10T06:47:14Z + +**Changes:** +211 -161 (2 files) + + +## Description + +Backport c121fa8f7d18cb86f343884084093a456d85a844 from #4352. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4485: Update stalled action timeout + +**URL:** https://github.com/opensearch-project/sql/pull/4485 + +**Author:** @Swiddis + +**Created:** 2025-10-09T20:22:36Z + +**State:** MERGED + +**Merged:** 2025-10-14T03:23:11Z + +**Changes:** +3 -2 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Reduces the stalled timer to 14 days (one triage interval) and also skips marking draft PRs as stalled + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @RyanL1997 on `.github/workflows/stalled.yml:29` + + +nice. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4484: Publish internal modules separately for downstream reuse + +**URL:** https://github.com/opensearch-project/sql/pull/4484 + +**Author:** @Swiddis + +**Created:** 2025-10-09T19:17:37Z + +**State:** MERGED + +**Merged:** 2025-10-23T23:27:40Z + +**Changes:** +702 -0 (8 files) + +**Labels:** `enhancement`, `backport 3.3` + + +## Description + +### Description +Copy of #4385 but merged with main, trying to see what's broken with tests + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java:11` + + +Since CalciteRelNodeVisitor needs a data source service now, but this notion doesn't make any sense outside of a cluster, let's just fill in an empty one that always returns no results and reports any given source as nonexistent. + +Better solution long-term would be to make CalciteRelNodeVisitor not have this dependency to begin with, seems like it crosses boundaries. + + +### @dai-chen on `.github/workflows/maven-publish-modules.yml:None` + + +Is this already deprecated? If so, probably you need a PR for previous feature branch to verify the publish? + + +### @Swiddis on `.github/workflows/maven-publish-modules.yml:None` + + +agh, sonatype is the gift that keeps on giving... + +Interesting that tests pass when this shouldn't be able to build + + +### @dai-chen on `.github/workflows/maven-publish-modules.yml:None` + + +Follow other workflow using `- '[0-9]+.[0-9]+'`? + + +## General Comments + + +### @Swiddis + + +~Don't merge -- trying to test the publish flow on the feature branch currently, it says "success" but no artifact shows up in central~ + +Fixed, successful publish: https://github.com/opensearch-project/sql/actions/runs/18763687236/job/53534148205 + + +--- + +# PR #4483: [Backport 2.19-dev] Add mvappend function for Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4483 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-09T19:17:37Z + +**State:** MERGED + +**Merged:** 2025-10-10T16:25:43Z + +**Changes:** +607 -90 (11 files) + + +## Description + +Backport 5c784fea9efd6d46c10c5524d7582b76bca64a34 from #4438. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +blocked by https://github.com/opensearch-project/sql/pull/4488 + + +### @ykmr1224 + + +https://github.com/opensearch-project/sql/pull/4488 was merged and rebased this PR. + + +--- + +# PR #4480: [Backport 2.19-dev] Run settings doctests last and remove per-doc “rollback” sections + +**URL:** https://github.com/opensearch-project/sql/pull/4480 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-09T17:14:32Z + +**State:** MERGED + +**Merged:** 2025-10-09T18:34:00Z + +**Changes:** +12 -45 (3 files) + + +## Description + +Backport a57796f44f028fcf2b2c10535c8d1fdb36f5fc59 from #4449. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4479: Add data anonymizer for spath command + +**URL:** https://github.com/opensearch-project/sql/pull/4479 + +**Author:** @ykmr1224 + +**Created:** 2025-10-09T16:02:36Z + +**State:** MERGED + +**Merged:** 2025-10-10T16:29:32Z + +**Changes:** +29 -0 (3 files) + +**Labels:** `PPL`, `maintenance`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Add data anonymizer for spath command (just noticed it was missing) + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4477: Add MAP_CONCAT internal function + +**URL:** https://github.com/opensearch-project/sql/pull/4477 + +**Author:** @ykmr1224 + +**Created:** 2025-10-09T15:26:15Z + +**State:** MERGED + +**Merged:** 2025-10-13T19:04:34Z + +**Changes:** +183 -0 (3 files) + +**Labels:** `PPL`, `feature`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Add MAP_CONCAT internal function for dynamic fields implementation + - It will be used to merge dynamic fields map among inputs (mainly join), where same key will be overwritten by later input. +- It is referring Spark library implementation +- Added integration test to verify its behavior and access to the function (manual RelNode generation to avoid exposing function in PPL syntax) + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4433 +- https://github.com/opensearch-project/sql/issues/4307 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - DISMISSED + + +Not sure. Do we need map_concat or map_zip_with? + +``` +spark-sql (default)> SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> array(v1, v2)) as res; +res +{1:["a","x"],2:["b","y"]} +``` + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +> Not sure. Do we need map_concat or map_zip_with? +> +> ``` +> spark-sql (default)> SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> array(v1, v2)) as res; +> res +> {1:["a","x"],2:["b","y"]} +> ``` + +That sounds more related to MAP_APPEND which I am planning to implement, and that will be used to merge dynamic fields when `spath` command is executed (because `spath` command needs to append instead of replace). +MAP_CONAT is different because it will simply overwrite value in case of key conflict. This is needed mainly for join operation where we don't merge values. + + +--- + +# PR #4476: [Backport 2.19-dev] Fix join type ambiguous issue when specify the join type with sql-like join criteria + +**URL:** https://github.com/opensearch-project/sql/pull/4476 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-09T09:40:04Z + +**State:** MERGED + +**Merged:** 2025-10-09T10:13:52Z + +**Changes:** +23 -1 (2 files) + + +## Description + +Backport 3d2043d4d02d035b2cd41345823535545d4e1f16 from #4474. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4475: Fix the bug of explicit makeNullLiteral for UDT fields + +**URL:** https://github.com/opensearch-project/sql/pull/4475 + +**Author:** @songkant-aws + +**Created:** 2025-10-09T09:01:36Z + +**State:** MERGED + +**Merged:** 2025-10-10T04:59:21Z + +**Changes:** +102 -0 (3 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Fix the bug that Calcite will error out when CalciteRelBuilder explicitly calling `makeNullLiteral` for UDT type fields. + +UDT type cast is handled by UDF. It's totally fine for `CAST` function, regardless whether the field value is NULL or non NULL. But for some cases in building RelNode, it has the problem of explicitly casting NULL to UDT type by `RexBuilder.makeNullLiteral` method. UDT cast is a RexCall of calling UDF, but `makeNullLiteral` internally expects the returned node to be RexLiteral of NULL. Thus, Calcite throws a hard error due to Java class casting failure. + +This PR fixes this issue by adding a simple check and calling super method in case the RexNode to be cast is literally NULL. + +### Related Issues +Resolves #4383 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4383.yml:59` + + +will `source=log-test1 | append [ source=log-test2 ] | fields timestamp` return `- match: { datarows: [["2025-09-04 16:15:00"], [null]] }`? + + +### @songkant-aws on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4383.yml:59` + + +Synced offline. It will append unique name in case of type conflicts and select fields are also successful + + +## General Comments + + +### @songkant-aws + + +cc @ahkcs @yuancu Please let me know if this simple fix can cover the issue. + + +### @ahkcs + + +Thanks for the fix! I think for #4383, it's more like an issue for long-term goals for `multisearch` command(can be applied to `append` command as well), so this PR might only partially resolve the issue + + + +### @LantaoJin + + +> Thanks for the fix! I think for #4383, it's more like an issue for long-term goals for `multisearch` command(can be applied to `append` command as well), so this PR might only partially resolve the issue + +Is `multisearch` available? can you test that after this merged. + + +### @songkant-aws + + +@ahkcs Makes sense. It's a short-term bug fix to not fail the query. 4383 wants to resolve another issue. I can create another report bug issue to avoid confusion + + +### @songkant-aws + + +@LantaoJin Tried the `multisearch` command. It also fixes the same issue in `multisearch` command. + + +--- + +# PR #4474: Fix join type ambiguous issue when specify the join type with sql-like join criteria + +**URL:** https://github.com/opensearch-project/sql/pull/4474 + +**Author:** @LantaoJin + +**Created:** 2025-10-09T08:18:12Z + +**State:** MERGED + +**Merged:** 2025-10-09T09:39:50Z + +**Changes:** +23 -1 (2 files) + +**Labels:** `bug`, `backport 2.19-dev`, `backport 3.3` + + +## Description + +### Description +Issue happens when use sql-like join criteria e.g. `on l.key=r.key` with specific join type argument `type=xxx`: +``` +| join type=left left=RATING right=PRODUCT ON RATING.product_id = PRODUCT.product_id +``` +throws `Join type is ambiguous, remove either the join type before JOIN keyword or ..` + +### Related Issues +Resolves #4459 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Should we backport this to 3.3 branch? @penghuo + + +--- + +# PR #4473: [Backport 2.19-dev] Support time modifiers in search command (#4224) + +**URL:** https://github.com/opensearch-project/sql/pull/4473 + +**Author:** @yuancu + +**Created:** 2025-10-09T06:07:48Z + +**State:** MERGED + +**Merged:** 2025-10-10T04:53:08Z + +**Changes:** +1707 -136 (34 files) + + +## Description + +## Description + +Backport #4224 to 2.19-dev + +## Commit Messages + +* Implement absolute time range in search command + + + +Unit test search with absolute time range + + + +Rephrase timeRange and timeModifier + + + +Switch to earliest and latest udf + + + +Add a convert util + + + +Verify time correctness during coversion + + + +Fix quarter parsing bugs + + + +Fix week snap parsing + + + +Remove old implementation that translates time modifier to time filter + + + +* Fix anomalyzed test & add a todo for an ignored test + + + +* Support now() in time range + + + +* Fix time modifier explain ITs + + + +* Support unixtimestamp (second) as a time modifier value + + + +* Update docs for search command with time modifiers + + + +* Test accessing fields with name earliest and latest in search command + + + +* Update doctest in condition.rst due to the update in the implementation of earliest and latest conditions + + + +* Update PPLQueryDataAnonymizerTest.java + + + +* Update explain ITs to use yaml plan files + + + +* Update a link to OpenSearch exists + + + +* Support using timesnaps without quotes + + + +* Add a unit test for direct format + +- additionally rename parseRelativeTime to resolveTimeModifier + + + +* Add support to ISO 8601 date format to time modifier, as it is now widely supported in PPL + + + +* Update syntax by reusing SPANLENGTH definition + + + +* Update explain IT for search with time modifier + + + +* Add integration tests for time modifiers + + + +* Parse timestamp string with multiple parsers in a loop + + + +* Remove opensearch test dependency from core module + + + +* Fix unit tests + + + +* Minor updates to explain limitations in search doc + + + +--------- + + +(cherry picked from commit e4685137de681a5f513a88c681b406ad5a786c27) + +### Related Issues + +#4135 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4472: [Backport 2.19-dev] [Doc] Enable doctest with Calcite (#4379) + +**URL:** https://github.com/opensearch-project/sql/pull/4472 + +**Author:** @LantaoJin + +**Created:** 2025-10-09T05:06:38Z + +**State:** MERGED + +**Merged:** 2025-10-09T05:40:43Z + +**Changes:** +236 -278 (8 files) + + +## Description + +(cherry picked from #4379 commit 9143a449f38bf08470ecc25826c474d8fd38d6c8) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4471: [Backport 2.19-dev] [DOC] Fix typo: Update eventstats.rst + +**URL:** https://github.com/opensearch-project/sql/pull/4471 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-09T03:34:29Z + +**State:** MERGED + +**Merged:** 2025-10-09T05:21:01Z + +**Changes:** +2 -2 (1 files) + + +## Description + +Backport d940e4edd557e84d397242ed51906bbb2d648a5f from #4447. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4470: [Backport 2.19-dev] Enable Calcite by default and implicit fallback the unsupported commands (#4372) + +**URL:** https://github.com/opensearch-project/sql/pull/4470 + +**Author:** @LantaoJin + +**Created:** 2025-10-09T03:14:37Z + +**State:** MERGED + +**Merged:** 2025-10-09T04:55:14Z + +**Changes:** +208 -171 (32 files) + + +## Description + +(cherry picked from #4372 commit b9e2761a744b839d0edd86213b13a2cd89322787) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4465: [Backport 2.19-dev] Refactor name resolution in Calcite PPL (#4393) + +**URL:** https://github.com/opensearch-project/sql/pull/4465 + +**Author:** @ykmr1224 + +**Created:** 2025-10-08T22:47:33Z + +**State:** MERGED + +**Merged:** 2025-10-09T03:26:24Z + +**Changes:** +536 -126 (8 files) + +**Labels:** `maintenance` + + +## Description + +Manual backport [9954652](https://github.com/ykmr1224/sql/commit/9954652bc66abf81a703b7267ba204b50fe4cb3c) from https://github.com/opensearch-project/sql/pull/4393 to 2.19-dev branch + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4464: Add `per_second` function support for `timechart` command + +**URL:** https://github.com/opensearch-project/sql/pull/4464 + +**Author:** @dai-chen + +**Created:** 2025-10-08T22:46:50Z + +**State:** MERGED + +**Merged:** 2025-10-13T19:52:33Z + +**Changes:** +622 -14 (17 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR adds `per_second()` support to the `timechart` command by implementing **Option 3 (Eval Transformation)**. As a short-term solution, it extends the `Timechart` AST to recognize the per_second function and rewrites into the equivalent math formula shown in the examples below. See issue #4350 for background and alternatives. + +#### Examples + +
    +-- Original
    +source=events
    +| timechart span=5m per_second(packets)
    +
    +-- Rewritten
    +source=logs
    +| timechart span=5m sum(packets) as `per_second(packets)`
    +| eval `per_second(packets)` = `per_second(packets)` / 300   -- 5 minutes = 300 seconds
    +
    + +For spans whose length varies with the calendar (month/quarter/year), the number of seconds depends on actual start/end timestamps (e.g., September has 30 days; October has 31 days; February in a leap year has 29 days). To ensure correctness, the rewrite computes the exact bucket length dynamically: + +
    +-- Example: span=2mon
    +source=logs
    +| timechart span=2mon sum(packets) as "per_second(packets)"
    +| eval `per_second(packets)` = `per_second(packets)` /
    +    timestampdiff(
    +      SECOND,
    +      @timestamp,                                  -- span start
    +      timestampadd(MONTH, 2, @timestamp)           -- span end   (start + 2 months)
    +    )
    +
    + +### Related Issues +Resolves partially #4350. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Hi @dai-chen , thanks for the change. + + +## Review Comments + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:119` + + +What happens if the span evaluates to 0 seconds (e.g., with millisecond spans or edge cases)? Should there be validation or error handling for division by zero? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:81` + + +Just for my knowledge: This `attach()` cannot be called multiple times in a way that causes double-transformation right? + +I believe the current implementation is safe because `transformPerFunction() `checks if the aggregate function is a `per_*` function before transforming, and after the first transformation it becomes `sum()`. But still, I'm just trying to understand the logic better. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:None` + + +I think we want to prefer Java 11 syntax as backport will break it. + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java:111` + + +Can we add more integration tests here for queries like: +``` +source=events_traffic | timechart span=1M per_second(packets) +``` +to make it clear on how it handles variable month lengths (28-31 days) + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:None` + + +Yes, I tried to avoid new feature elsewhere. But this one is following the intervalUntiToSpanUnit above it. Let me change it back to old switch if concerned. Thanks! + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:119` + + +Good catch. Let me check if `timechart` has the validation or not. Thanks! + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java:111` + + +Sure, let me check if I can add more. Thanks! + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java:111` + + +Addressed in https://github.com/opensearch-project/sql/pull/4464/commits/995171e7da73d18e63d482f3cdc94241ddadd1ce. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:None` + + +Addressed in https://github.com/opensearch-project/sql/pull/4464/commits/995171e7da73d18e63d482f3cdc94241ddadd1ce. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:81` + + +Yes, because of the transformation, any further call (if any) will be no-op. Ideally I think we should have a new AST rewriter abstraction. However, because this is a short-term solution, I finally insert the logic here. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:119` + + +I quick tested `timechart` command and found negative span value is not allowed in our grammar. But zero-span validation is missing. Let me create an issue and raise separate PR to fix. + +
    +opensearchsql> source=test_data_2023 | timechart span=0m per_second(packets);
    +TransportError(500, 'SearchPhaseExecutionException', {'error': 
    +{'reason':'Error occurred in OpenSearch engine: all shards failed', 'details': 'Shard[0]:
    +  java.lang.IllegalArgumentException: Zero or negative time interval not supported\n\n
    +    For more details, please send request for Json format to see the raw response from OpenSearch engine.',
    +      'type': 'SearchPhaseExecutionException'}, 'status': 400})
    +
    +opensearchsql> source=test_data_2023 | timechart span=-1m per_second(packets);
    +{'reason': 'Invalid Query', 'details': "[-] is not a valid term at this part of the query: '...23
    +  | timechart span=-' <-- HERE. extraneous input '-' expecting {SPANLENGTH, INTEGER_LITERAL}",
    +    'type': 'SyntaxCheckException'}
    +
    + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:119` + + +Created issue: https://github.com/opensearch-project/sql/issues/4527. Thanks! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4462: Switch to Guice#createInjector and add concurrent SQL/PPL regression ITs + +**URL:** https://github.com/opensearch-project/sql/pull/4462 + +**Author:** @penghuo + +**Created:** 2025-10-08T21:30:33Z + +**State:** MERGED + +**Merged:** 2025-10-09T15:13:50Z + +**Changes:** +232 -2 (4 files) + +**Labels:** `bug`, `SQL` + +**Assignees:** @penghuo + + +## Description + +### Description +* replace ModulesBuilder#createInjector() with Guice.createInjector(...) in SQLPlugin and TransportPPLQueryAction so the SQLService and PPLService created are not singleton. +* add SQLConcurrencyIT and PPLConcurrencyIT that hammer COUNT/SUM against the shared Index.ACCOUNT data using fixed thread pools. + +### Related Issues +https://github.com/opensearch-project/sql/pull/4454 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4457: Add more examples to the `where` command doc + +**URL:** https://github.com/opensearch-project/sql/pull/4457 + +**Author:** @ahkcs + +**Created:** 2025-10-08T18:45:15Z + +**State:** MERGED + +**Merged:** 2025-10-14T21:28:53Z + +**Changes:** +134 -3 (1 files) + +**Labels:** `documentation`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description + +This PR significantly expands the `WHERE` command documentation to provide users with comprehensive examples covering a wide range of filtering scenarios in Piped Processing Language (PPL). + +### What Changed + +* **Enhanced document structure:** Improved formatting consistency for headings, sections, and syntax highlighting +* **Expanded examples:** Added **7 new practical examples** (previously 1 → now 8 total) illustrating diverse filtering use cases, including: + + * Basic field comparisons + * Pattern matching using the `LIKE` operator (underscore and percent wildcards) + * Multiple conditions with the `AND` operator + * The `IN` operator for matching multiple values + * NULL value checks using `ISNULL()` + * Complex conditions combining parentheses and logical operators + * The `NOT` operator for exclusion filtering + + + + +From #4227 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4456: Support Regex for replace eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4456 + +**Author:** @ahkcs + +**Created:** 2025-10-08T18:29:43Z + +**State:** MERGED + +**Merged:** 2025-10-13T16:23:21Z + +**Changes:** +256 -3 (4 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +**Description** +Enhances the `replace()` function to support regular expressions, enabling advanced text manipulation in PPL queries. + +--- + +**Examples** + +Before (literal only): + +```sql +source=logs | eval cleaned = replace(message, "ERROR", "WARN") +``` + +After (full regex): + +```sql +# Remove digits +source=logs | eval cleaned = replace(message, '\d+', '') + +# Swap date format (MM/DD/YYYY → DD/MM/YYYY) +source=logs | eval reformatted = replace(date, '^(\d{2})/(\d{2})/(\d{4})$', '\2/\1/\3') + +# Extract domain from email +source=users | eval domain = replace(email, '.*@(.+)', '\1') +``` + + + +## Reviews + + +### @RyanL1997 - CHANGES_REQUESTED + + +Hi @ahkcs , thanks for the change. I just left a comment of re-using existing operator of regex replacement and you can take a look. If there is any questions, feel free to ask me. + + +### @RyanL1997 - COMMENTED + + +Hi @ahkcs , thanks for the update of the implementation and I just left some comments. + + +## Review Comments + + +### @RyanL1997 on `core/src/test/java/org/opensearch/sql/expression/function/udf/ReplaceFunctionTest.java:1` + + +Instead of creating the new UDF we should re-use the existing supported sql opperator. You can reference to my previous implementation at `sed` in `rex` that I was doing the same thing: +- https://github.com/opensearch-project/sql/pull/4241 + +For more details you can also reference to: https://github.com/opensearch-project/sql/pull/4109#discussion_r2305922467 +``` +REX_SED - Complete native Calcite optimization +REGEXP_REPLACE_PG_3 for basic substitution +REGEXP_REPLACE_PG_4 for flagged substitution (g, i) +REGEXP_REPLACE_5 for nth occurrence +TRANSLATE3 for transliteration (y/from/to/) +``` +As you can see there are various of options that available for regex replace and I think you can choose the one you need. + + +### @RyanL1997 on `core/src/test/java/org/opensearch/sql/expression/function/udf/ReplaceFunctionTest.java:1` + + +More specifically, you may need to use this: +```java +BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3, +``` + + +### @ahkcs on `core/src/test/java/org/opensearch/sql/expression/function/udf/ReplaceFunctionTest.java:1` + + +Hi @RyanL1997 , thanks for the suggestion, I have updated the implementation to remove the custom UDF and leverage the existing sql operator + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +nice. + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLStringBuiltinFunctionIT.java:None` + + +lets remove the verbose comments + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Is this because the requirement requires java style? What is the expected behavior over here? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Because we use java regex internally but we want to support PCRE style syntax, so we add this conversion logic to translate PCRE syntax to java regex + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLStringBuiltinFunctionIT.java:None` + + +removed + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +non-blocking: I remember I do have the same logic for `sed`, maybe we can extract them into a certain util. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Do we need to do this in `visitFunction`? +Wondering if we can move this logic to the function side. +If that is difficult, let's extract it to an method + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +What happens if regex is invalid? Should we check validity and raise exception? + + +### @ykmr1224 on `docs/user/ppl/functions/string.rst:261` + + +Does it accept $0 to refer whole match? + +What happens when reference does not exist? (like when pattern define only 2 groups, but replacement refer $3) + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Moved from CalciteRexNodeVisitor to PPLFuncImpTable + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Example: + source=test | eval result=replace(field, "(invalid[", "\1") + ↑ + invalid regex + + What happens: + + 1. Our code transforms the replacement string: "\1" → "$1" (this always succeeds) + 2. Calcite builds the plan: REGEXP_REPLACE(field, "(invalid[", "$1") + 3. At execution time, Java's Pattern.compile("(invalid[") throws exception + +So I think we don't need to add additional error handling here + + +### @ahkcs on `docs/user/ppl/functions/string.rst:261` + + +>Does it accept $0 to refer whole match? + +Yes, Our conversion code transforms \0 → $0, and Java regex supports $0 to refer to the entire matched string + +>What happens when reference does not exist? (like when pattern define only 2 groups, but replacement refer $3) + +It will throw exception: ` IndexOutOfBoundsException: No group 3` + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +3 -> will that exception be translated to 4xx error and user can identify the issue? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Nice! + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Updated the implementation to have 4xx code for users to understand, +Updated error message: +``` +Debugging error message: method [POST], host [http://127.0.0.1:56271], URI [/_plugins/_ppl], status line [HTTP/1.1 400 Bad Request] +{ + "error": { + "reason": "Invalid Query", + "details": "java.util.regex.PatternSyntaxException: Unclosed character class near index 8\n[unclosed\n ^", + "type": "UncheckedExecutionException" + }, + "status": 400 +} +``` + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Hmm, adding PatternSyntaxException to client error could potentially hide our bugs. (Even if we make some mistake in somewhere and generate invalid pattern within our logic, the error would be reported as 400) +Can't we simply compile pattern once and raise IllegalArgumentException? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +It makes sense, updated the implementation to handle error in PPLFuncImpTable as well, new error message: +``` + Error message: method [POST], host [http://127.0.0.1:61357], URI [/_plugins/_ppl], status line [HTTP/1.1 400 Bad Request] +{ + "error": { + "reason": "Invalid Query", + "details": "Cannot resolve function: REPLACE, arguments: [STRING,STRING,STRING], caused by: Invalid regex pattern '[unclosed': Unclosed character class", + "type": "ExpressionEvaluationException" + }, + "status": 400 +} +``` + +Also added IT test for this + + +## General Comments + + +### @RyanL1997 + + +@ahkcs , also I was wondering for the usage of the regex pattern in this function, will it support captured name group? If that is the case, there are some known limitation of java regex library. see details at https://github.com/opensearch-project/sql/pull/4434#issuecomment-3383457347, and lets see if we need to call this out in the related docs. + + +### @ahkcs + + +> @ahkcs , also I was wondering for the usage of the regex pattern in this function, will it support captured name group? If that is the case, there are some known limitation of java regex library. see details at [#4434 (comment)](https://github.com/opensearch-project/sql/pull/4434#issuecomment-3383457347), and lets say if we need to call this out in the related docs. + +@RyanL1997 , thanks for calling it out, named groups like (?.+) are used by the rex command, which does have the Java regex limitation you mentioned (no underscores allowed in group names). The replace() function avoids this limitation entirely by using numbered references. + + For example: + - Rex fails: rex field=email ".+@(?.+)" (underscore not allowed) + - Replace works: eval domain_name = replace(email, '.+@(.+)', '\1') (field name defined in eval, not in regex) + + So the Java named group limitations in #4434 don't apply to replace() function. The field naming is controlled by the eval command, bypassing Java identifier restrictions. + + + + + +### @RyanL1997 + + +> > @ahkcs , also I was wondering for the usage of the regex pattern in this function, will it support captured name group? If that is the case, there are some known limitation of java regex library. see details at [#4434 (comment)](https://github.com/opensearch-project/sql/pull/4434#issuecomment-3383457347), and lets say if we need to call this out in the related docs. +> +> @RyanL1997 , thanks for calling it out, named groups like (?.+) are used by the rex command, which does have the Java regex limitation you mentioned (no underscores allowed in group names). The replace() function avoids this limitation entirely by using numbered references. +> +> For example: +> +> * Rex fails: rex field=email ".+@(?.+)" (underscore not allowed) +> * Replace works: eval domain_name = replace(email, '.+@(.+)', '\1') (field name defined in eval, not in regex) +> +> So the Java named group limitations in #4434 don't apply to replace() function. The field naming is controlled by the eval command, bypassing Java identifier restrictions. + +Make sense, since in the case of this PR, the extraction should be done by `eval` so it won't triggered the java regex pattern limitation for named capture group. + + +--- + +# PR #4455: [Backport 2.19-dev] Add ignorePrometheus Flag for integTest and docTest (#4442) + +**URL:** https://github.com/opensearch-project/sql/pull/4455 + +**Author:** @penghuo + +**Created:** 2025-10-08T16:14:36Z + +**State:** MERGED + +**Merged:** 2025-10-08T21:44:54Z + +**Changes:** +109 -25 (7 files) + +**Labels:** `PPL` + + +## Description + +(cherry picked from commit 344af4aa85b0afccfd5903a325aa4423a1afd0e2) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4454: Remove shared mutable optimizer field that caused race condition + +**URL:** https://github.com/opensearch-project/sql/pull/4454 + +**Author:** @noCharger + +**Created:** 2025-10-08T13:05:01Z + +**State:** MERGED + +**Merged:** 2025-10-08T17:56:04Z + +**Changes:** +72 -19 (3 files) + +**Labels:** `bug` + + +## Description + +### Description + +Remove shared mutable `optimizer` field that caused race condition. This field was shared across all concurrent queries that using same Analyzer singleton. For example, + +``` + this.optimizer = optimizer; +``` + +Query A sets to optimizerA and Query B overwrite it, causing Query A to use Query B's optimizer with wrong LogicalPlan mappings. + +This resulted in "can't evaluate on aggregator" exception https://github.com/opensearch-project/sql/blob/773066fdde249a6d9e6457e22ed74c42e01aceaf/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java#L81-L83 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4452: [AUTO] Increment version to 3.4.0-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/4452 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-08T00:09:57Z + +**State:** MERGED + +**Merged:** 2025-10-18T03:37:07Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `maintenance`, `v3.4.0` + + +## Description + +- Incremented version to **3.4.0-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @peterzhuamazon + + +* pending https://github.com/opensearch-project/geospatial/pull/802 + + +### @peterzhuamazon + + +* Pending https://github.com/opensearch-project/sql/pull/4588 + + +### @peterzhuamazon + + +Will just merge this since it is blocking https://github.com/opensearch-project/sql/pull/4588. +All changes will be there to take effect. + +Thanks. + + +--- + +# PR #4451: Add replace command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4451 + +**Author:** @ahkcs + +**Created:** 2025-10-07T21:14:09Z + +**State:** MERGED + +**Merged:** 2025-10-16T18:21:02Z + +**Changes:** +1070 -1 (20 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +## Description + +Implement the `replace` command in PPL to replace text patterns in specified fields. This PR includes the grammar implementation and basic replacement functionality. This implementation reuses the existing replace core logic. Missing / new features for regex support will be added in a separate PR. + +Original PR: #4248 + +--- + +## Syntax + +``` + | replace '' WITH '' IN field_list +``` + +* **pattern**: The text pattern to search for (case-sensitive) +* **replacement**: The text to replace matches with +* **field_list**: Comma-separated list of fields to perform replacement in + The `replace` command modifies the specified fields in-place with the replaced text. + +--- + +## Semantics + +### Expected Behavior + +* **Action**: Modifies specified fields in-place with replaced text values +* **Scope**: Operates only on the specified fields +* **Data Modification**: Specified fields are updated with replacement text +* **Case Sensitivity**: Text literal matching is case-sensitive +* **Pattern Type**: Only supports literal string patterns (wildcards or regex support added later) + +### Implementation Approach + +* Modifies specified fields in-place using string replacement +* Performs literal string replacement in the specified fields +* Other fields remain unchanged +* Validates the existence of fields and correctness of pattern / replacement parameters + +--- + +## Example Queries + +```sql +-- Replace in a single field +source=logs | replace 'error' WITH 'ERROR' IN message + +-- Replace in multiple fields +source=logs | replace 'USA' WITH 'United States' IN country, state + +-- Combine with other commands +source=logs + | where level = 'error' + | replace 'error' WITH 'ERROR' IN message + | sort @timestamp + +-- Replace and select specific fields +source=logs | replace 'error' WITH 'ERROR' IN message | fields message, level +``` + +--- + +## Output Schema + +For each field specified in the `IN` clause, the field is modified in-place: + +* **Specified fields**: modified with replacement text +* **Other fields**: remain unchanged + +**Example** +Input: + +``` +message = "error occurred", level = "error" +``` + +After `replace 'error' WITH 'ERROR' IN message, level` → + +``` +message = "ERROR occurred" +level = "ERROR" +``` + +--- + +## Resolves +#3975 + +--- + +## Check List + +* [x] New functionality includes tests +* [x] New functionality has documentation +* [x] Javadoc added for new components +* [x] User manual / user documentation updated +* [x] PPL command checklist confirmed +* [x] Companion API changes PR created (if applicable) +* [x] Commits are signed per DCO (`--signoff` or `-s`) +* [x] Public documentation / issue or PR created + +By submitting this pull request, I confirm that my contribution is made under the terms of the **Apache 2.0 license**. + + + +## Reviews + + +### @dai-chen - COMMENTED + + +A high level QQ: any existing command can be reused instead of duplicating same/similar logic? e.g., make this subset of what rex command can support or rewrite to string replace function? + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +suggestion: We should annotate these with `@NotNull`. + + +### @Swiddis on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +suggestion: Should we have a global method for turning this into a LinkedHashSet? + +This is currently checked in `validate()`, but I don't think this is the only command to rely on a fieldList. If we make Replace take a Set instead of a List, we simplify the validation logic there, and we can push the responsibility of deduplicating fieldLists to shared parsing code. + + +### @Swiddis on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java:None` + + +nit: Catch a more specific Exception + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Changed to `@Nullable`, received similar comment before: https://github.com/opensearch-project/sql/pull/3878#discussion_r2297019972 + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Updated to accept Set instead of List, removing duplicate validation logic + + +### @ahkcs on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java:None` + + +updated to be more specific + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Fwiw, I also prefer using optional in fields, despite the warning. I'm not sure why that warning exists. :shrug: + +But why mark it nullable if we validate that they can't be null? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +We marked child as @Nullable, which we didn't include in the validate logic + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Is this used? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Not sure why we need this list. Looks like it becomes the copy of fieldNames. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Let's extract this. + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Will we enable multiple patterns like `REPLACE a WITH a1, b with b1 IN ...` + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Removed + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Updated + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Extracted + + +### @ahkcs on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +That's a good point! I have updated the grammar. The grammar now supports comma-separated replacement pairs, also added test cases + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Let's delete instead of deprecate. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Let's delete instead of deprecate. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Is this used? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Is all or most of these validation already guaranteed by ANTLR grammar? + + +### @dai-chen on `docs/user/ppl/cmd/replace.rst:None` + + +This example is not necessary as all examples have IN clause? +Meanwhile there is no example for multiple replace pairs? + + +### @dai-chen on `docs/user/ppl/cmd/replace.rst:16` + + +Shall we mention version since? I forgot what's the standard way we proposed.. @ritvibhatt + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +visitField behind line 2424 already cover this? + + +### @ahkcs on `docs/user/ppl/cmd/replace.rst:16` + + +I think for the current proposal, we don't need to add version info +Reference: https://github.com/opensearch-project/sql/issues/4220#issuecomment-3353061607 + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + + Without the explicit validation logic, the invalid field would be silently ignored - no error, no replacement + The validation logic is used to throw clear error message when users specify invalid fields + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Removed redundant validation + + +### @ahkcs on `docs/user/ppl/cmd/replace.rst:None` + + +That's a good point! Removed unnecessary example and added example for multiple replace pairs + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +I'm a little confused. You mean each command needs to validate field by itself. +Here is the `relbuilder.field()` source code I'm looking at: + +``` + public RexInputRef field(int inputCount, int inputOrdinal, String fieldName) { + final Frame frame = peek_(inputCount, inputOrdinal); + final List fieldNames = Pair.left(frame.fields()); + int i = fieldNames.indexOf(fieldName); + if (i >= 0) { + return field(inputCount, inputOrdinal, i); + } else { + throw new IllegalArgumentException("field [" + fieldName + + "] not found; input fields are: " + fieldNames); + } + } +``` + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +In the implementation, we iterate over fieldNames (the actual schema fields) and only call field() for fields that exist, relBuilder.field() never gets called with invalid fields - so they're silently skipped. + +I think it's a good catch that you're suggesting we should leverage relBuilder.field()'s built-in validation instead of duplicating it. I have updated the logic to remove this validation logic and use the build-in validation instead by iterating all the fields instead of the actual schema fields. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/ast/tree/Replace.java:None` + + +Updated + + +## General Comments + + +### @ahkcs + + +I updated the implementation to change from creating a new column `new_*` to in-place replacement for fields + + +### @ahkcs + + +> A high level QQ: any existing command can be reused instead of duplicating same/similar logic? e.g., make this subset of what rex command can support or rewrite to string replace function? + +Thanks for the suggestion @dai-chen! For our replace command, we are not adopting regex support so our existing logic(such as replace eval function, rex sed mode) does not work here + + +--- + +# PR #4450: Time Unit Unification for bin/stats + +**URL:** https://github.com/opensearch-project/sql/pull/4450 + +**Author:** @ahkcs + +**Created:** 2025-10-07T21:08:07Z + +**State:** MERGED + +**Merged:** 2025-12-10T20:00:18Z + +**Changes:** +131 -55 (5 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Implements unified time unit support across PPL commands with case-sensitive handling. + + Key Changes: + - Fixed case sensitivity for `bin` command: Uppercase M = month, lowercase m = minute + - Added subsecond test cases to bin command: us, cs, ds + - Documented limitations: stats command cannot support subsecond units (OpenSearch calendar intervals) + +### Related Issues +Resolves #4397 + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Shall we have single place for span parsing logic (currently in `AstExpressionBuilder.visitSpan`?) instead of trying to parse span string everywhere? Any difference between span in bin options and span elsewhere? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +> Shall we have single place for span parsing logic (currently in `AstExpressionBuilder.visitSpan`?) instead of trying to parse span string everywhere? Any difference between span in bin options and span elsewhere? + +Thanks @dai-chen for the suggestion, however bin span is indeed different with the span elsewhere +For Span Types: + - stats/timechart: Numeric + Time only + - bin: Numeric + Time + Logarithmic (log2, log10, 2.5log3) + + I think it's still necessary to separate the implementation of bin span and other span + + + +### @dai-chen + + +> > Shall we have single place for span parsing logic (currently in `AstExpressionBuilder.visitSpan`?) instead of trying to parse span string everywhere? Any difference between span in bin options and span elsewhere? +> +> Thanks @dai-chen for the suggestion, however bin span is indeed different with the span elsewhere For Span Types: +> +> * stats/timechart: Numeric + Time only +> * bin: Numeric + Time + Logarithmic (log2, log10, 2.5log3) +> +> I think it's still necessary to separate the implementation of bin span and other span + +I see. We can consider how to extract the common logic later. + + +--- + +# PR #4449: Run settings doctests last and remove per-doc “rollback” sections + +**URL:** https://github.com/opensearch-project/sql/pull/4449 + +**Author:** @penghuo + +**Created:** 2025-10-07T20:57:26Z + +**State:** MERGED + +**Merged:** 2025-10-09T17:14:17Z + +**Changes:** +12 -45 (3 files) + +**Labels:** `documentation`, `PPL`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +1. remove rollback-to-default examples from docs/user/ppl/admin/settings.rst to match current guidance +2. move user/ppl/admin/settings.rst into a dedicated bash_settings category for doctest selection +3. keep random ordering for other suites while ensuring the settings suite runs last in doctest/test_docs.py + +### Related Issues +https://github.com/opensearch-project/sql/issues/4441 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4447: [DOC] Fix typo: Update eventstats.rst + +**URL:** https://github.com/opensearch-project/sql/pull/4447 + +**Author:** @alexey-temnikov + +**Created:** 2025-10-07T15:56:09Z + +**State:** MERGED + +**Merged:** 2025-10-07T16:33:29Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `documentation`, `PPL`, `backport 2.19-dev` + + +## Description + +Fixed typo: evenstats --> eventstats + +### Description +Fixed typo: evenstats --> eventstats + +### Related Issues +N/A + + +### Check List + - Doc Issue only + - Doc Issue only + - Doc Issue only + - Doc Issue only + - Doc Issue only +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4446: Support format=yaml in Explain API + +**URL:** https://github.com/opensearch-project/sql/pull/4446 + +**Author:** @penghuo + +**Created:** 2025-10-06T23:39:59Z + +**State:** MERGED + +**Merged:** 2025-10-20T20:02:34Z + +**Changes:** +401 -184 (17 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +* Add the YAML explain format responses. It more human readable. +* Refresh user documentation. [Example 4 YAML format (experimental)](https://github.com/penghuo/os-sql/blob/9ffe777e996b621896426cf00fb4dc3193b8aab1/docs/user/ppl/interfaces/endpoint.rst +). Marked it as experimental, the reasons is YAML format is more for developer to debug and test. The response may change. + +``` +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5]) + LogicalFilter(condition=[>($5, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["name","country","state","month","year","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) +``` + +### Related Issues +https://github.com/opensearch-project/sql/issues/4351 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:None` + + +issue: We should handle queries that have multiple DSL components (e.g. joins) + +Straightforward way is number them "dsl1", "dsl2", etc and fill in references appropriately + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:59` + + +suggestion (non-blocking): Should we move the explain functionality to a dedicated location? + +The class is somewhat large as-is, and I don't like the idea of adding more complexity to OpenSearchExecutionEngine. + + +### @penghuo on `docs/category.json:6` + + +Test PPL with calcite + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:59` + + +Good point! + +I like your [PR](https://github.com/opensearch-project/sql-cli/pull/37). add yaml format to minic that. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:None` + + +Done. + + +### @penghuo on `docs/user/ppl/interfaces/endpoint.rst:87` + + +Refresh doctest will correct response. Not breaking change. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:43` + + +Leverage Yaml response format. +Deprecated assertYamlEqualsJsonIgnoreId function. @ykmr1224 Please take a look + + +### @penghuo on `protocol/src/main/java/org/opensearch/sql/protocol/response/format/Format.java:48` + + +These not response FORMAT. We should deprecated these in 4.0 and suggest user using `explain statement` + + +## General Comments + + +### @RyanL1997 + + +may need to manual backport it + + +### @penghuo + + +Manual backport https://github.com/opensearch-project/sql/pull/4609 + + +--- + +# PR #4443: [Backport 2.19-dev] PPL `fillnull` command enhancement (#4421) + +**URL:** https://github.com/opensearch-project/sql/pull/4443 + +**Author:** @ahkcs + +**Created:** 2025-10-06T21:24:18Z + +**State:** MERGED + +**Merged:** 2025-10-07T16:33:52Z + +**Changes:** +485 -25 (16 files) + + +## Description + +(cherry picked from commit e1a1bd88dfd92d9fe4c954387583c52c0bf6c29b) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4442: Add ignorePrometheus Flag for integTest and docTest + +**URL:** https://github.com/opensearch-project/sql/pull/4442 + +**Author:** @penghuo + +**Created:** 2025-10-06T20:38:46Z + +**State:** MERGED + +**Merged:** 2025-10-06T22:03:53Z + +**Changes:** +55 -25 (7 files) + +**Labels:** `documentation`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +* Added an ignorePrometheus flag to integration and doctest Gradle tasks so Prometheus startup and Prometheus-dependent tests are skipped whenever the property is present (unless explicitly set to false). +* Adjusted doctest harness to drop Prometheus-specific documentation cases when the flag is active. +* Updated developer docs (integration tests, doctest guide, Prometheus datasource section, describe command) to cover the new flag and cross-reference Prometheus metadata guidance. + +### Testing +``` +./gradlew doctest -DignorePrometheus +./gradlew integTest -DignorePrometheus +``` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +Backport PR, https://github.com/opensearch-project/sql/pull/4455 + + +--- + +# PR #4440: Support eval returns decimal division result instead of integer + +**URL:** https://github.com/opensearch-project/sql/pull/4440 + +**Author:** @penghuo + +**Created:** 2025-10-06T16:57:54Z + +**State:** MERGED + +**Merged:** 2025-10-10T16:24:38Z + +**Changes:** +201 -60 (6 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +* Divide. Integer operands follow the legacy truncating result when [plugins.ppl.syntax.legacy.preferred](https://github.com/penghuo/os-sql/blob/39aacf495a5993dcae42d27bd3832a4165eaa169/docs/user/ppl/admin/settings.rst) is true (default). When the setting is false the operands are promoted to floating point, preserving the fractional part. Division by zero still returns NULL. +* Doc updated + * [plugins-ppl-syntax-legacy-preferred settings](https://github.com/penghuo/os-sql/blob/39aacf495a5993dcae42d27bd3832a4165eaa169/docs/user/ppl/admin/settings.rst#). + * [Expressions](https://github.com/penghuo/os-sql/blob/39aacf495a5993dcae42d27bd3832a4165eaa169/docs/user/ppl/functions/expressions.rst) + +### Related Issues +https://github.com/opensearch-project/sql/issues/3946 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +Thanks for the change! With this change, I think the old implementation `DivideFunction` can be removed. + + +## Review Comments + + +### @penghuo on `docs/user/ppl/admin/settings.rst:231` + + +To be removed. https://github.com/opensearch-project/sql/issues/4441 + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:93` + + +Wrap with CalcitePlanContext.run, No code logic change. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:133` + + +Wrap with CalcitePlanContext.run, No code logic change. + + +### @penghuo on `docs/user/ppl/admin/settings.rst:231` + + +PR https://github.com/opensearch-project/sql/pull/4449 + + +## General Comments + + +### @penghuo + + +@yuancu @LantaoJin Please take a look. + + +--- + +# PR #4438: Add mvappend function for Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4438 + +**Author:** @ykmr1224 + +**Created:** 2025-10-04T00:50:01Z + +**State:** MERGED + +**Merged:** 2025-10-09T19:16:46Z + +**Changes:** +607 -90 (11 files) + +**Labels:** `PPL`, `feature`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Added `mvappend(value1, value2, ...)` that combines all arguments into a single array + - Each argument could be an value, an array, or null. +- It is needed to implement `spath` command without `path` parameter so it can merge extracted fields with existing fields. + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4433 +- https://github.com/opensearch-project/sql/issues/4307 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ahkcs on `docs/user/ppl/functions/collection.rst:None` + + +I think we can remove the mentioning of version based on the new doc template +https://github.com/opensearch-project/sql/issues/4220#issuecomment-3353061607 +@ritvibhatt + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java:127` + + +Can we change these test cases to use `verifyDataRows`? Test result should be stable after testing on my local + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java:107` + + +ditto + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java:198` + + +ditto + + +### @ahkcs on `docs/user/ppl/functions/collection.rst:303` + + +Can we add more doctests for other type coercion such as Integer and double? + + +### @penghuo on `docs/user/ppl/functions/collection.rst:225` + + +In example, add following use cases +1. Handle dupldate values. mvappend(1, 1) +2. Handle null values. mvappend(1, null) +3. Nested use case, mvapend(1, mvappend(2, 3)) + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java:None` + + +3 questions +1. Could explain mostGeneralType rule? +2. if (hasStringType && hasNumericType) it should return ANY? +3. can we leverage typeFactory.leastRestrictive(List.of(...))? + + + +### @penghuo on `docs/user/ppl/functions/collection.rst:219` + + +What is difference with ARRAY? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java:None` + + +Not sure this code path been used or not. If yes, it will throw exception + +class [I cannot be cast to class [Ljava.lang.Object; ([I and [Ljava.lang.Object; are in module java.base of loader 'bootstrap') +java.lang.ClassCastException: class [I cannot be cast to class [Ljava.lang.Object; ([I and [Ljava.lang.Object; are in module java.base of loader 'bootstrap') + +for example + +``` + @Test + public void test() { + Object arg = new int[] {1, 2, 3}; + List objects = new ArrayList<>(); + + if (arg.getClass().isArray()) { + addArrayElements((Object[]) arg, objects); + } + } + + private static void addArrayElements(Object[] array, List elements) { + for (Object item : array) { + if (item != null) { + elements.add(item); + } + } + } +``` + + +### @ykmr1224 on `docs/user/ppl/functions/collection.rst:225` + + +Added more examples. A bit unsure if we should put so many examples... + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java:None` + + +Fixed the logic to simply return ANY when any incompatible type is detected. typeFactory.leastRestrictive does not do much job for incompatible types (It only works between structs) + + +### @ykmr1224 on `docs/user/ppl/functions/collection.rst:219` + + +ARRAY cannot take array as input. It cannot handle null as well. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java:None` + + +Thanks, it is fixed. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java:None` + + +nit: Does AbstractTypeCoercion.getTightestCommonType() works? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java:None` + + +The basic idea of `AbstractTypeCoercion.getTightestCommonType()` is returning widened type with no precision loss, and works only for nullability difference and number precision difference. Is it mainly used for binary operator (or function) to align the input types to execute operation, in my understanding. + + +### @dai-chen on `docs/user/ppl/functions/collection.rst:297` + + +This is the main reason to add a UDF rather than reusing `array_append`? + + +### @ykmr1224 on `docs/user/ppl/functions/collection.rst:297` + + +Main motivation is to handle null/value/array seemlessly, so that value merge operation won't become too complicated. + + +## General Comments + + +### @ahkcs + + +> * Each argument could be an value, an array, or null. + +How does the current logic handle primitive arrays? +I tried this test case in `MVAppendFunctionImplTest.java` and it returns `ClassCastException` error + +``` + @Test + public void testMvappendWithPrimitiveIntArray() { + int[] primitiveArray = {1, 2, 3}; + Object result = MVAppendFunctionImpl.mvappend(primitiveArray, 4); + assertEquals(Arrays.asList(1, 2, 3, 4), result); + } +``` + + +### @ahkcs + + +How does the current implementation handle type coercion? +I tested this in `CalciteMVAppendFunctionIT.java` and it passes: +``` +@Test + public void testMvappendWithIntegerAndDouble() throws IOException { + JSONObject actual = + executeQuery( + source( + TEST_INDEX_BANK, + "eval result = mvappend(1, 2.5, 3, 4.7) | head 1 | fields result")); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(0, 2.5, 0, 4.7))); + } + ``` + +Is it expected the return 0 for integer value when the array has both integer and double? Or do we need some additional handling logic here + + +### @ykmr1224 + + +Fixed logic to simply return ANY in case of mixed types. This spec might be debatable and welcome your opinion, but the idea is to leave users to cast to their preferred type. (Anyway, `spath` would extract values as `ANY` from schema perspective.) + + +--- + +# PR #4436: [Backport 3.3] reverting to _doc + _id + +**URL:** https://github.com/opensearch-project/sql/pull/4436 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-03T18:51:45Z + +**State:** MERGED + +**Merged:** 2025-10-03T20:15:39Z + +**Changes:** +23 -13 (3 files) + +**Labels:** `enhancement`, `v3.3.0` + + +## Description + +Backport 416a32752706f22964ffb43bb138166efa29a3d4 from #4435. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4435: reverting to _doc + _id + +**URL:** https://github.com/opensearch-project/sql/pull/4435 + +**Author:** @ahkcs + +**Created:** 2025-10-03T18:21:14Z + +**State:** MERGED + +**Merged:** 2025-10-03T18:51:31Z + +**Changes:** +23 -13 (3 files) + +**Labels:** `enhancement`, `PPL`, `backport 3.3` + + +## Description + +### Description +Reverting the sort key back to _doc + _id + +### Related PR +#4378 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4434: [Enhancement] Error handling for illegal character usage in java regex named capture group + +**URL:** https://github.com/opensearch-project/sql/pull/4434 + +**Author:** @RyanL1997 + +**Created:** 2025-10-03T18:07:20Z + +**State:** MERGED + +**Merged:** 2025-10-14T17:46:02Z + +**Changes:** +285 -24 (6 files) + +**Labels:** `enhancement`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +[Enhancement] Error handling for underscore usage in rex regex + +- Better handling for invalid named capture group +- Updated the doc +- Add ITs + +### Related Issue +* Resolve #3944 +* Resolve https://github.com/opensearch-project/sql/issues/4467 +* Relate to all the named capture group extraction related command (`parse`/`rex`/`replace`...) + +### Behavior after enhancement +```bash +# rex command +curl -X POST "localhost:9200/_plugins/_ppl" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "source=accounts | rex field=email \".+@(?.+)\" | fields email, domain_name" +}' + +{ + "error": { + "reason": "Invalid Query", + "details": "Invalid capture group name 'domain_name'. Java regex group names must start with a letter and contain only letters and digits.", + "type": "IllegalArgumentException" + }, + "status": 400 +} + + +# parse command +curl -X POST "localhost:9200/_plugins/_ppl" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "source=accounts | parse email \".+@(?.+)\" | fields email, domain-name | head 5" +}' + +{ + "error": { + "reason": "Invalid Query", + "details": "Invalid capture group name 'domain-name'. Java regex group names must start with a letter and contain only letters and digits.", + "type": "IllegalArgumentException" + }, + "status": 400 +}% +``` + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - CHANGES_REQUESTED + + +I think same as issue https://github.com/opensearch-project/sql/issues/3944 +If yes, add a yamlRestTest to verify the issued fixed. + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +This is fine for now because we only use it in one place, but I'm concerned if any functionality comes up that uses this, they might not know they have to validate. It's easy to miss. + +Ideally we should have the validation happen directly as part of getting named group candidates, or even just add a second path for if the main group find returns 0, we check against the underscore version and raise the exception then. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +thats a good idea. Applied the improvement. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +Simpliy use REGEX? + +``` +return groupName.matches("^[A-Za-z][A-Za-z0-9_]*$"); +``` + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +fixed, forgot 1 commit 🫠 + + +## General Comments + + +### @RyanL1997 + + +> I think same as issue https://github.com/opensearch-project/sql/issues/3944 +If yes, add a yamlRestTest to verify the issued fixed. + +Hi @penghuo, good callout. I was actually taking a look for the `-` and ended up that is another limitation from java regex library. So rechecked and in summary: + +### Java Regex Named Group Character Rules + +According to Java's Pattern class documentation, named capture groups follow these rules: + +#### Valid Characters: + +- Must start with: Letter (a-z, A-Z) +- Can contain: Letters (a-z, A-Z) and digits (0-9) + +#### Invalid Characters (will cause PatternSyntaxException): + +- Underscore (_) - This is what we're handling with our validation +- Hyphen/Dash (-) - This is what you tested with curl +- Period/Dot (.) +- Space ( ) +- Special characters (@, #, $, %, ^, &, *, etc.) +- Unicode characters beyond ASCII +- Cannot start with digit (0-9) + +### Java Documentation: + +#### The official Java documentation for this is in: +- Java Pattern class: java.util.regex.Pattern +- Specific section: "Named-capturing group" in the Pattern syntax documentation +- Rule: Named groups use the syntax (?X) where name must be a valid Java identifier but with more restrictions + +#### The exact Java rule is: + +``` + // From Java's Pattern class source code: + // Named group name must match: [a-zA-Z][a-zA-Z0-9]* +``` + +#### Quick Test Examples: + +``` +# These will FAIL in Java regex: +(?...) # underscore +(?...) # hyphen +(?...) # dot +(?...) # space +(?<@domain>...) # special char +(?<1domain>...) # starts with digit + +# These will WORK in Java regex: +(?...) # letters only +(?...) # camelCase +(?...) # letters + numbers +(?...) # uppercase +``` + + +### @RyanL1997 + + +According to the above, since the limitation is coming from the upstream library we are using, I just unified the error msg to handle all the illegal character cases in extraction commands like `rex` and `parse` and for the above `parse` related issue I also left a comment of work around by using `rename` command: https://github.com/opensearch-project/sql/issues/3944#issuecomment-3383678537. + +cc @penghuo + + +### @ykmr1224 + + +Technically we should be able to support the invalid characters by rewriting regex and map extracted values back to original name like: + +1. original pattern: `.+@(?.+)` +2. rewritten pattern: `.+@(?.+)` (maintain mapping `domainname` -> `domain_name` internally) +3. assign extracted value: domain_name = extracted['domainname'] + +This could cause name overwrap, but we can workaround by adding suffix, etc. to come up with unique name. + + +### @RyanL1997 + + +Hi @ykmr1224 , thanks for the input. And that was an interesting approach and I was actually did a PoC of your suggestion on my local. However, I do find some limitations of this approach: +- If the renaming of the pattern is just removing the unsupported character, what if the renamed pattern after removal is duplicate with a existing field? (e.g. the index field contains a `username`, user would like to extract the alias of the email (`xyz@email.com` -> `xyz`) as a column called `user_name` and in the above logic it will be renamed as `username` which is conflict with the index field. +- Stack traces and error messages refer to rewritten names (e.g. in `explain`) , which I think will cause the confusions for users. +- I haven't try it on any of the benchmarking dataset yet for the re-writing approach, but from the time complexity wise + - Current approach: `O(1)` per validation + - Rewriting approach: `O(k)` where k=number of named groups (for mapping tables as we are re-writing the column names) + +According to the above, I think we can keep as the current limitation handling approach for now, if there are some feature request coming from the users we can think about actually supporting these characters as a new feature requests. But if that day comes, I truly think the correct approach is to find a PCRE style library for regex patterns. + + +### @ykmr1224 + + +@RyanL1997 +Thanks for PoCing. +In case of name overwrap, we can add suffix like `username1`, and increment in case it still overwrap. (it will eventually find unique name) +i.e: `(?.+)(?.+)(?.+)` => `(?.+)(?(?.+)`, `mapping = {username2 => user_name, username => username, username1 => username1}` + +Computation cost for regex rewrite should be ignorable since this happens only once during query analysis. And rename after extraction should be just an alias or projection (should not affect performance in my understanding). + +It can be separate task from this PR, btw. +And I think we want to quickly check the possible usage of this use case to see the priority. + + +### @RyanL1997 + + +> It can be separate task from this PR, btw. +And I think we want to quickly check the possible usage of this use case to see the priority. + +Transferring some internal communication over here - since the above issue is not blocking this change. This change is ready to be merged and I have created a issue https://github.com/opensearch-project/sql/issues/4549 to discover the feasibility of the above suggestion. + + +--- + +# PR #4431: [Backport 2.19-dev] Support `multisearch` command in calcite (#4332) + +**URL:** https://github.com/opensearch-project/sql/pull/4431 + +**Author:** @ahkcs + +**Created:** 2025-10-02T18:13:36Z + +**State:** MERGED + +**Merged:** 2025-10-03T16:53:17Z + +**Changes:** +1891 -69 (32 files) + + +## Description + +(cherry picked from commit ca5a5bd6ee36ecdbbb2d3dc7a4b73ba28ecafa95) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4425: [Backport 2.19-dev] Add max/min eval functions (#4333) + +**URL:** https://github.com/opensearch-project/sql/pull/4425 + +**Author:** @ritvibhatt + +**Created:** 2025-10-02T06:36:58Z + +**State:** MERGED + +**Merged:** 2025-10-07T23:41:30Z + +**Changes:** +622 -1 (16 files) + + +## Description + +(cherry picked from commit fae06873a057f3bcdea1a7a113c74932fc801deb) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4423: [Backport main] Add 3.3 release notes + +**URL:** https://github.com/opensearch-project/sql/pull/4423 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-10-01T18:00:40Z + +**State:** MERGED + +**Merged:** 2025-10-01T18:09:12Z + +**Changes:** +86 -0 (1 files) + +**Labels:** `skip-changelog` + + +## Description + +Backport ae5eb515614043e81ebdbf031e1b805dca0fc449 from #4422. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4422: Add 3.3 release notes + +**URL:** https://github.com/opensearch-project/sql/pull/4422 + +**Author:** @Swiddis + +**Created:** 2025-10-01T16:34:34Z + +**State:** MERGED + +**Merged:** 2025-10-01T18:00:24Z + +**Changes:** +86 -0 (1 files) + +**Labels:** `backport main`, `skip-changelog` + + +## Description + +### Description +Release notes + +### Related Issues +#4037 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +I recall there is a workflow to raise release PR automatically. Could you confirm if this is required and won't conflict with that? Thanks. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Swiddis + + +@ahkcs Bin command ITs are flaky: https://github.com/opensearch-project/sql/actions/runs/18169061498/job/51718900541?pr=4422 + + +--- + +# PR #4421: PPL `fillnull` command enhancement + +**URL:** https://github.com/opensearch-project/sql/pull/4421 + +**Author:** @ahkcs + +**Created:** 2025-09-30T23:02:52Z + +**State:** MERGED + +**Merged:** 2025-10-06T18:10:53Z + +**Changes:** +484 -21 (16 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### What does this PR do? + +Adds an alternative, option-style form for `fillnull` (available **since 3.4**) alongside existing `with/using` syntax. Semantics remain **strict and schema-stable** (no permissive casting in this PR). + +**New syntax (option-style):** + +```ppl +source=accounts | fillnull value=0 // apply to all fields (subject to type constraints) +source=accounts | fillnull value=0 age balance // apply to specific fields +``` + +**Existing syntax (still supported):** + +```ppl +source=accounts | fillnull with 0 in age, balance +source=accounts | fillnull using age=0, balance=0 +``` + +--- + +### Notes / Limitations (intentional in this PR) + +* `value` is **mandatory** (no default value yet). +* Mixed-type fills with a single replacement still error by design (strict typing preserved). +* Semantics remain strict and schema-preserving + +--- + +### Future Work + +* Track and implement an **opt-in permissive/schemaless mode** for mixed-type scenarios (follow-up: **#4349**). + + +### Related Issues +Partially resolves #4419 + +### Check List + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Thanks for the change @ahkcs . I just left some comments. + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java:None` + + +why all comment out? + + +### @dai-chen on `docs/user/ppl/cmd/fillnull.rst:None` + + +I didn't find validation code and test. What will happen if users provide replacement value of different type? + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Looks like this test case is not for `fillnull`. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Same as above. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Same as above. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +I don't see much value to test with multiple aggregate functions. + +And should we add tests for new syntax? + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +I think it does not belong to AGGREGATIONS. + + +### @ykmr1224 on `docs/user/ppl/cmd/fillnull.rst:None` + + +This example does not demonstrate the behavior (no field is filled) + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:294` + + +Why does it anonymize to different syntax? + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Hi Tomo, I added these test case to track how `fillnull` interacts with aggregate functions + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Hi Tomo, the new syntax is tested in `CalciteFillNullCommandIT` +These tests are added to test how `fillnull` command interacts with aggregate functions, I think they are optional but it can show how aggregate functions changed after using fillnull + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +I updated to remove these tests as they are not directly related to `fillnull` command + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Removed + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +Removed + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java:None` + + +I updated to remove these tests as they are not directly related to fillnull command + + +### @ahkcs on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +Moved Value to COMMAND ASSIST KEYWORDS + + +### @ahkcs on `docs/user/ppl/cmd/fillnull.rst:None` + + +Updated + + +### @ahkcs on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:294` + + +It was reusing the existing logic to anonymize. +The existing logic distinguishes between two syntaxes based on semantic behavior: + - Same values → with...in + - Different values → using + +However, value= syntax also applies the same value to all fields (like with...in), so we can't rely on this check alone to distinguish them. We need to track which syntax was originally used. + + Added a useValueSyntax boolean flag to the FillNull AST node: + - Set to true when parsing fillnull value=... + - Set to false for with...in and using (default) + +This change might introduce some changes that is not clean so I think maybe it is also okay to keep using the existing logic since it only affects anonymizer test now. + +I updated the implementation to have the same syntax now for anonymizer test + + + + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java:None` + + +Hi Chen, I forgot to put it back after testing, restored + + +### @ahkcs on `docs/user/ppl/cmd/fillnull.rst:None` + + +Hi Chen, I added test cases in `CalciteFillNullCommandIT` to show the error message when users provide replacement value of different type + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java:None` + + +I feel this error message is confusing to users? Should we validate in analyzer layer and return clear error message like 'the replacement value type is not compatible ..."? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java:53` + + +Is this only for anonymizer? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java:53` + + +Yes, I think this change is debatable, you can refer to this comment: https://github.com/opensearch-project/sql/pull/4421#discussion_r2395563594 + + +### @RyanL1997 on `docs/user/ppl/cmd/fillnull.rst:None` + + +is this the new format of unified rst doc for ppl? cc @ritvibhatt + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java:None` + + +what is this? is this referencing to the following test case? + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java:None` + + ++1 + + +### @RyanL1997 on `docs/user/ppl/cmd/fillnull.rst:None` + + +question: since calcite is enabled by default, do we still need to emphasize this? cc @ritvibhatt + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java:None` + + +Yes, this is for testing the new value= syntax + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java:None` + + +nit: we can remove it, since the test name itself should explain the purpose + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java:None` + + +Updated to remove this + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4418: [SQL/PPL] Fix the `count(*)` and `dc(field)` to be capped at MAX_INTEGER #4416 + +**URL:** https://github.com/opensearch-project/sql/pull/4418 + +**Author:** @aalva500-prog + +**Created:** 2025-09-30T21:45:02Z + +**State:** MERGED + +**Merged:** 2025-10-01T04:00:51Z + +**Changes:** +129 -116 (14 files) + +**Labels:** `bug`, `PPL`, `backport 2.19-dev`, `engine v2`, `v3.3.0` + + +## Description + +### Description +Currently, SQL/PPL Count Query Is Maxed at MAX_INT + +For example, in the following query `requestId` is capped at the value of `max(integer)`, however, the actual number of `requestId` could be higher: +``` +{ +"query": ""source = accounts | stats count(requestId)"" +} +{ + "schema": [ + { + "name": "count(requestId)", + "type": "integer" + } + ], + "datarows": [ + [ + 2147483647 + ] + ], + "total": 1, + "size": 1 +} +``` + +This PR addresses the above issue by changing the return data type of the `count()` function from `integer` to `long`, so the new result will be as follows: +``` +{ +"query": ""source = accounts | stats count(requestId)"" +} +{ + "schema": [ + { + "name": "count(requestId)", + "type": "bigint" + } + ], + "datarows": [ + [ + 2147483647 + ] + ], + "total": 1, + "size": 1 +} +``` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +lgtm, assuming tests pass + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +> https://github.com/opensearch-project/sql/actions/runs/18144320253/job/51642545887?pr=4418#step:6:575 + +Got +```bash +SQLBackwardsCompatibilityIT > testBackwardsCompatibility FAILED + java.lang.AssertionError: + Expected: iterable with items [(name=COUNT(*) FILTER(WHERE age > 35), alias=null, type=integer)] in any order + but: not matched: <{"name":"COUNT(*) FILTER(WHERE age > 35)","type":"long"}> + at __randomizedtesting.SeedInfo.seed([27E773C284B404C7:CCD84E0CFE6FD14D]:0) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:18) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:6) + + at org.opensearch.sql.util.MatcherUtils.verify(MatcherUtils.java:196) + at org.opensearch.sql.util.MatcherUtils.verifySchema(MatcherUtils.java:144) + at org.opensearch.sql.bwc.SQLBackwardsCompatibilityIT.verifySQLQueries(SQLBackwardsCompatibilityIT.java:190) + at org.opensearch.sql.bwc.SQLBackwardsCompatibilityIT.testBackwardsCompatibility(SQLBackwardsCompatibilityIT.java:126) + 1> [2025-09-30T17:51:35,784][INFO ][o.o.s.b.SQLBackwardsCompatibilityIT] [testBackwardsCompatibility] before test + 1> [2025-09-30T17:51:35,946][INFO ][o.o.s.b.SQLBackwardsCompatibilityIT] [testBackwardsCompatibility] initializing REST clients against [http://[::1]:36159, +``` + + +### @LantaoJin + + +Just a question: are we still need to fix v2 only bugs? Now the default engine migrate to v3 from 3.3.0, and this fixing seems no plan to backport to 3.1. + + +### @RyanL1997 + + +> Just a question: are we still need to fix v2 only bugs? Now the default engine migrate to v3 from 3.3.0, and this fixing seems no plan to backport to 3.1. + + +Hi @LantaoJin, this is related to a serverless issue I think. Serverless is still on a customized version of 2.17 of sql which is still relying on the legacy engine. cc @aalva500-prog + + +### @aalva500-prog + + +Hi @LantaoJin, @RyanL1997 is correct, this issue was reported from serverless side. The latest OS version they use is 2.17 customized. I'm not sure when they plan to migrate to V3, but I don't think it will be in the near future. + + +--- + +# PR #4417: Version bump: 3.3 + +**URL:** https://github.com/opensearch-project/sql/pull/4417 + +**Author:** @Swiddis + +**Created:** 2025-09-30T21:29:39Z + +**State:** MERGED + +**Merged:** 2025-09-30T22:01:30Z + +**Changes:** +2 -2 (2 files) + +**Labels:** `skip-changelog`, `v3.3.0` + + +## Description + +### Description +Like #4036 except it actually works (read: includes @LantaoJin's fix) + +### Related Issues +#4037 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4415: [Backport 2.19-dev] Backport/backport 4334 to 2.19 dev + +**URL:** https://github.com/opensearch-project/sql/pull/4415 + +**Author:** @vamsimanohar + +**Created:** 2025-09-30T17:54:08Z + +**State:** MERGED + +**Merged:** 2025-09-30T19:37:45Z + +**Changes:** +197 -188 (17 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4414: [Backport 2.19-dev] Fix numbered token bug and make it optional output in patterns command (#4402) + +**URL:** https://github.com/opensearch-project/sql/pull/4414 + +**Author:** @songkant-aws + +**Created:** 2025-09-30T07:49:29Z + +**State:** MERGED + +**Merged:** 2025-09-30T16:13:49Z + +**Changes:** +1023 -417 (39 files) + + +## Description + +* Fix patterns command incorrectly parsing token and add parameter option + + + +* Fix some cases after merge + + + +* Fix failed tests + + + +* Fix UTs and modify docs + + + +* Add Brain parameter explanation to the doc + + + +* Modify patterns doctest after calcite default engine enabled + + + +* Rephrase some words in patterns.rst + + + +--------- + +### Description +Backport #4402 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4413: Fallback to sub-aggregation if composite aggregation doesn't support + +**URL:** https://github.com/opensearch-project/sql/pull/4413 + +**Author:** @qianheng-aws + +**Created:** 2025-09-30T05:25:11Z + +**State:** MERGED + +**Merged:** 2025-10-10T05:12:48Z + +**Changes:** +399 -113 (17 files) + +**Labels:** `enhancement`, `PPL`, `aggregation`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Fallback to sub-aggregation if composite aggregation doesn't support(e.g. auto_span). + +As sub-aggregation includes term agg which doesn't include null bucket, so we only fallback to push the aggregation when bucket_nullable=false. Otherwise, the aggregation will fail to push down and executes as a Calcite's Enumerable aggregate operator. + +This PR also refact and scale the optimization #4337 for bucket aggregation including single bucket and nested bucket(i.e. sub-agg). + +### Related Issues +Resolves #4338 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:47` + + +``` + LogicalAggregate + LogicalProject + LogicalFilter(condition=[IS NOT NULL($7)]) +``` +Shouldn't be following? +``` + final LogicalAggregate aggregate = call.rel(0); + final LogicalProject project = call.rel(1); + final LogicalFilter filter = call.rel(2); + final CalciteLogicalIndexScan scan = call.rel(3); +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:151` + + +why not "Agg-Project-Filter-TableScan"? I think `Agg-Project` should be treated as a single unit + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +can you create a named var for `countAggNameAndBuilderPair.getLeft()`? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +javadoc need to update + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +can you move it to package `org.opensearch.sql.exception`? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:47` + + +The original plan is +``` +LogicalAggregate + LogicalProject + LogicalFilter(condition=[IS NOT NULL($7)]) + LogicalProject + LogicalIndexScan +``` + +`ProjectFilterTransposeRule` and `ProjectMergeRule` will produce `agg-filter-project-scan` pattern. + +Maybe `FilterProjectTransposeRule` + `ProjectMergeRule` may produce `agg-project-filter-scan` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:151` + + +But `filter(isnotnull)` here is also derived from the `agg`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:47` + + +The pattern `agg-filter-project-scan` will make it easier to detect whether the filter is derived from the `agg`. The other pattern could work as well while we need to do additional transformation since the ref of field has passed through a `project` operator. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +This exception only happens here and will be caught internally. It doesn't need to be accessed outside of this file + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +done + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +done + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4412: [Backport 2.19-dev] [Feature][Enhancement] Enhance patterns command with additional sample_logs output field (#4155) + +**URL:** https://github.com/opensearch-project/sql/pull/4412 + +**Author:** @songkant-aws + +**Created:** 2025-09-30T05:09:02Z + +**State:** MERGED + +**Merged:** 2025-09-30T07:29:55Z + +**Changes:** +119 -42 (9 files) + + +## Description + +### Description +Backport #4155 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @songkant-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml:None` + + +This part should retain pushdown behavior + + +### @songkant-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml:None` + + +This line: https://github.com/opensearch-project/sql/blob/2.19-dev/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java#L25 prevents the pushdown behavior. Need to verify with the author who made the previous commit. + + +### @songkant-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml:None` + + +Verified it's not needed. Removing above line. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4411: [Backport 2.19-dev] Fix parse related functions return behavior in case of NULL input (#4381) + +**URL:** https://github.com/opensearch-project/sql/pull/4411 + +**Author:** @songkant-aws + +**Created:** 2025-09-30T02:00:09Z + +**State:** MERGED + +**Merged:** 2025-09-30T04:47:20Z + +**Changes:** +104 -72 (17 files) + + +## Description + +* Fix parse related functions return null values in case of null input + + + +* Fix some tests + + + +* Remove unused function registry + + + +--------- + +### Description +Backport #4381 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4408: [Backport 2.19-dev] Mod function should return decimal instead of float when handle the operands are decimal literal + +**URL:** https://github.com/opensearch-project/sql/pull/4408 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-29T11:00:33Z + +**State:** MERGED + +**Merged:** 2025-09-29T11:41:34Z + +**Changes:** +51 -0 (3 files) + + +## Description + +Backport a6d56265bb2e2a4db7744066b0900ba4dc56e96b from #4407. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4407: Mod function should return decimal instead of float when handle the operands are decimal literal + +**URL:** https://github.com/opensearch-project/sql/pull/4407 + +**Author:** @LantaoJin + +**Created:** 2025-09-29T10:25:25Z + +**State:** MERGED + +**Merged:** 2025-09-29T11:00:19Z + +**Changes:** +51 -0 (3 files) + +**Labels:** `bug`, `backport 2.19-dev`, `v3.3.0` + + +## Description + +### Description +Mod function should return decimal instead of float when handle the operands are decimal literal + +### Related Issues +Resolves #4406 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4404: [Backport 2.19-dev] Scale of decimal literal should always be positive in Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4404 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-29T09:14:05Z + +**State:** MERGED + +**Merged:** 2025-09-29T11:05:16Z + +**Changes:** +85 -11 (6 files) + + +## Description + +Backport d567ec8130680e73b4a64370f42edbb99e34a0ec from #4401. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4402: Fix numbered token bug and make it optional output in patterns command + +**URL:** https://github.com/opensearch-project/sql/pull/4402 + +**Author:** @songkant-aws + +**Created:** 2025-09-29T06:54:15Z + +**State:** MERGED + +**Merged:** 2025-09-30T04:12:20Z + +**Changes:** +1023 -417 (39 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR will resolve one feature enhancement and a bug fix. + +1. Feature: Since 3.1.0, patterns command for brain algorithm outputs numbered token like `.@` instead of `<*>.<*>@<*>`. But it's not always wanted per customer request. So add a new flag option `show_numbered_token=true|false` in the patterns command to make it optional. By default, it will not show numbered token to be compatible with V2's format. +2. Bug fix: In some cases, tokens are parsed incorrectly. The brain algorithm could generate pattern like: `<*> failed to start for block<*><*><*>`. It has continuous variable placeholder `<*>` but actually they can be merged into one. The parsing logic can't identify continuous variable placeholder. So at the final pattern generation part, we will merge continuous `<*>` placeholders. + +### Related Issues +Resolves #4364 , #4363 , #4366, #4362 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `docs/user/ppl/cmd/patterns.rst:34` + + +Seems when version <3.1.0, the command does not show numbered tokens. 3.1.0 <= version < 3.3.0, this command shows numbered tokens by default. when version >=3.3.0, align with the behaviour of version <3.1.0, right? + +Any thoughts to control the default value of `show_numbered_token` by `plugins.ppl.syntax.legacy.preferred` in future? + + +### @LantaoJin on `docs/user/ppl/cmd/patterns.rst:None` + + +`PPL` is meaning the query is not executable in doctest. Now [almost of PPL rst files moved to calcite enabled queue](https://github.com/opensearch-project/sql/pull/4379#discussion_r2387558074) since we have enabled calcite by default in main branch (3.3.0), please rebase and check if all queries can execute with calcite correctly. + + +### @LantaoJin on `docs/user/ppl/cmd/patterns.rst:None` + + +It is controlled by `docs/category.json` + + +### @songkant-aws on `docs/user/ppl/cmd/patterns.rst:None` + + +I changed doctest after merging main. After Calcite is enabled by default for doctest, patterns command output has some format change in this fix. So I changed doctest for patterns.rst. It's expected change. + +Grok and Parse doctest are actually not impacted. + + +### @songkant-aws on `docs/user/ppl/cmd/patterns.rst:34` + + +Will create the issue for it. Another suggestion from user is we could backport the change to older version. Not sure we need some minor version like 3.1.x. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4401: Scale of decimal literal should always be positive in Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4401 + +**Author:** @LantaoJin + +**Created:** 2025-09-29T05:55:53Z + +**State:** MERGED + +**Merged:** 2025-09-29T09:13:51Z + +**Changes:** +85 -11 (6 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Calcite `makeExactLiteral()` cannot accept negative scale of decimal literal. +``` + public RexLiteral makeExactLiteral(BigDecimal bd) { + RelDataType relType; + int scale = bd.scale(); + assert scale >= 0; + ... +``` + +But when a Double value convert to BigDecimal, its scale could be negative due to Double's scientific notation. For example, +``` + double a = 9223372036854775807.0000001; + BigDecimal b = BigDecimal.valueOf(a); +``` +`b.scale()` return `-3` + +### Related Issues +Resolves #4391 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:578` + + +This line fixes the issue. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4400: Pushdown case function in aggregations as range queries + +**URL:** https://github.com/opensearch-project/sql/pull/4400 + +**Author:** @yuancu + +**Created:** 2025-09-28T10:31:34Z + +**State:** MERGED + +**Merged:** 2025-10-22T06:34:16Z + +**Changes:** +2211 -143 (42 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR push down [CASE](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/condition.rst#case) functions used in aggregations as range queries. + +For example, the query `source=bank | eval age_range = case (age < 30, 'u30', age < 40, 'u40' else 'u100') | stats avg(balance) by age_range` will be pushed down as the following OpenSearch DSL: +```json +{ + "aggregations": { + "age_range": { + "range": { + "field": "age", + "ranges": [ + { + "key": "u30", + "to": 30 + }, + { + "key": "u40", + "from": 30, + "to": 40 + }, + { + "key": "u100", + "from": 40 + } + ], + "keyed": true + }, + "aggregations": { + "avg(balance)": { + "avg": { + "field": "balance" + } + } + } + } + } +} +``` + +A CASE function used in aggregation can be pushed down only if it satisfied the following criteria: +- Result expressions must be string literals +- The field referenced in the condition must be the same +- Field references must be numeric +- Ranges must be closed-open intervals: $[a, b)$, $[a, +inf)$, or $(-inf, b)$. +- No further operations can be performed on the result of the case function + +**Limitations**: +- It only handle cases where the result expression is a string literal. E.g. `case(balance<10, 'poor' else 'rich')` will be pushed down, while `case(balance<10, 10 else 100)` won't. +- Red flag: range query will ignore `null` values. E.g. `eval b = case(balance<10, 'poor' else 'rich') | stats avg(balance) by b` will not properly handle cases when there are balance with `null` values. For `case` function, null values be categorized into the else group; while with pushed-down aggregation, rows with `null` balance will be ignored. +- With `case` function, the default else group is `null`. However, since `null` can not be a key for a range query, we substitute it with `"null"`. This can be fixed later by assigning a secret key to the else group, and substituting it later when parsing the response. + +**Examples of generated DSL** + +
    + +Case 1: Group by the case field only, with sub-aggregations + + +``` +source=bank | eval age_range = case (age < 30, 'u30', age < 40, 'u40' else 'u100') | stats avg(balance) by age_range +``` + +``` +RangeAgg + MetricAgg +``` + +```json +{ + "aggregations": { + "age_range": { + "range": { + "field": "age", + "ranges": [ + { + "key": "u30", + "to": 30 + }, + { + "key": "u40", + "from": 30, + "to": 40 + }, + { + "key": "u100", + "from": 40 + } + ], + "keyed": true + }, + "aggregations": { + "avg(balance)": { + "avg": { + "field": "balance" + } + } + } + } + } +} +``` +
    + +
    + +Case 2: Group by multiple ranges with sub-aggregations + + +``` +source=bank | eval age_range = case (age < 30, 'u30', age < 35, 'u35', age < 40, 'u40', age >= 40, 'u100'), balance_range = case(balance < 20000, 'medium' else 'high') | stats avg(balance) by age_range, balance_range +``` + +``` +RangeAgg + RangeAgg + MetricAgg +``` + +```json +{ + "aggregations": { + "age_range": { + "range": { + "field": "age", + "ranges": [ + { + "key": "u30", + "to": 30 + }, + { + "key": "u35", + "from": 30, + "to": 35 + }, + { + "key": "u40", + "from": 35, + "to": 40 + }, + { + "key": "u100", + "from": 40 + } + ], + "keyed": true + }, + "aggregations": { + "balance_range": { + "range": { + "field": "balance", + "ranges": [ + { + "key": "medium", + "to": 20000 + }, + { + "key": "high", + "from": 20000 + } + ], + "keyed": true + }, + "aggregations": { + "avg(balance)": { + "avg": { + "field": "balance" + } + } + } + } + } + } + } +} +``` +
    + +
    + +Case 3: Group by case field and keyword field + + +``` +source=bank | eval age_range = case (age < 30, 'u30', age < 35, 'u35', age < 40, 'u40', age >= 40, 'u100') | stats avg(balance), count() by firstname, lastname, age_range +``` + +``` +CompositeAgg + RangeAgg + MetricAgg +``` + +```json +{ + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "firstname": { + "terms": { + "field": "firstname", + "missing_bucket": true, + "missing_order": "first", + "order": "asc" + } + } + }, + { + "lastname": { + "terms": { + "field": "lastname", + "missing_bucket": true, + "missing_order": "first", + "order": "asc" + } + } + } + ] + }, + "aggregations": { + "age_range": { + "range": { + "field": "age", + "ranges": [ + { + "key": "u30", + "to": 30 + }, + { + "key": "u35", + "from": 30, + "to": 35 + }, + { + "key": "u40", + "from": 35, + "to": 40 + }, + { + "key": "u100", + "from": 40 + } + ], + "keyed": true + }, + "aggregations": { + "avg(balance)": { + "avg": { + "field": "balance" + } + } + } + } + } + } + } +} +``` +
    + +**TODOs**: + + +### Related Issues +Resolves #4201 , partially resolves #4338 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Does it mean we don't need `CompositeAggregationParser` anymore? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Is there any logic for handling range bucket when this exception happened? This part of code will construct composite agg unsupported bucket like `auto_date_histogram`. I'm wondering what will happens if both having `auto_span` + `range_bucket` in our query. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Why should we handle range bucket separately? Can it be handled in `createNestedBuckets` like how we construct auto_date_span? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Not related to this PR but #4413. It seems we have chance to optimize sub-agg by combining part of buckets into composite bucket if they're supported by composite agg. + +e.g. transform `termBucket-termBucket-autoDateSpanBucket` to `compositeBucket - autoDateSpanBucket`. + +We can also do bucket reorder to scale this optimization to more cases + +e.g. transform `termBucket-autoDateSpanBucket-termBucket` to `compositeBucket - autoDateSpanBucket`. + + + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Please add test case like `bin timestamp bins=xxx | eval age_range = case ... | stats count() by timestamp, age_range` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Seems composite agg is more proper than constructing multiple sub-agg by `createNestedBuckets`. It looks good to me to keep the current implementation. https://github.com/opensearch-project/sql/pull/4400#discussion_r2425156433. + +We should refactor auto_date_span to have similar implementation but also have to use `createNestedBuckets` if both have range_bucket and auto_date_span. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Yes, I'm thinking of retiring `CompositeAggregationParser` since a composite aggregation is essentially also a bucket aggregation. By keeping only `CompositeAggregationParser`, we can avoid duplicating codes to handle subaggregations,etc. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +I haven't handled the case where auto date histogram and range bucket coexists. Currently, it will treat `age_range` as a sub-term aggregation with script. + +
    + +explain results for `source=time_test | bin timestamp bins=3 | eval value_range = case(value < 7000, 'small' else 'great') | stats bucket_nullable=false avg(value) by timestamp, value_range` + + + +``` +{ + "calcite": { + "logical": """LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$2], timestamp=[$0], value_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg(value)=[AVG($2)]) + LogicalProject(timestamp=[$9], value_range=[$10], value=[$2]) + LogicalFilter(condition=[IS NOT NULL($9)]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], timestamp=[WIDTH_BUCKET($3, 3, -(MAX($3) OVER (), MIN($3) OVER ()), MAX($3) OVER ())], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'great':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, time_test]]) +""", + "physical": """EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, time_test]], PushDownContext=[[AGGREGATION->rel#2751:LogicalAggregate.NONE.[](input=RelSubset#2693,group={1, 2},avg(value)=AVG($0)), PROJECT->[avg(value), timestamp, value_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQERHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiQHRpbWVzdGFtcCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImNhdGVnb3J5IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogInZhbHVlIgogICAgfSwKICAgIHsKICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogInRpbWVzdGFtcCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0Atp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAyLAogICAgICAgICAgIm5hbWUiOiAiJDIiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDcwMDAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJzbWFsbCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJncmVhdCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAABHQACkB0aW1lc3RhbXBzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3QAD0xqYXZhL3V0aWwvTWFwO3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAJVElNRVNUQU1QfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AE3QABERhdGVzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABp4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABwAAAAAc3IAE2phdmEudXRpbC5BcnJheUxpc3R4gdIdmcdhnQMAAUkABHNpemV4cAAAAAF3BAAAAAF0ABdkYXRlX2hvdXJfbWludXRlX3NlY29uZHh0AAhjYXRlZ29yeX5xAH4AEnQABlNUUklOR3QABXZhbHVlfnEAfgASdAAHSU5URUdFUnQACXRpbWVzdGFtcHNxAH4AC3EAfgAUcQB+ABdxAH4AG3NxAH4AHwAAAAF3BAAAAAF0ABdkYXRlX2hvdXJfbWludXRlX3NlY29uZHh4AHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1760342401776012000}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":{"_key":"asc"}},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +""" + } +} +``` + +
    + +I'll optimize this case and move the snippets of creating range buckets to `createNestedBuckets` + + +### @LantaoJin on `docs/user/ppl/functions/condition.rst:233` + + +IMO, it's not a limitation of `case` function, it is just a restricted optimization. We can just call out in what case, the `case` function would be optimized to range DSL. Can we add some optimizable `case` usages in user doc? + + +### @LantaoJin on `docs/user/ppl/functions/condition.rst:None` + + +remove this + + +### @yuancu on `docs/user/ppl/functions/condition.rst:233` + + +I think the stated conditions are in the scope of restricted optimizations, but the limitations are not because we will still do the optimization regardless of whether it has null values in its column or whether there is a default NULL range. + +The problem is that there is no way to know in advance whether there exists null values in a column. Therefore, if we do this optimization, we always risk the discrepancy in results of with & without push-down. + + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Fixed with the latest implementation + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Fixed + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Fixed & test case added + + +### @yuancu on `docs/user/ppl/functions/condition.rst:None` + + +Removed + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +[non-blocking] Should this part of code be put in the second branch of the below `if` for better readability? It should have structure like: +``` +if (aggregate.getGroupSet().isEmpty()) { + // no group by +} else { + // Push auto date span & case in group-by list into nested aggregations + ... + ... + if (groupList.isEmpty()) { + // No composite aggregation at top-level + ... + } else { + // Composite aggregation at top level + ... + } +} +``` + +It works well with current code but performing useless operations on some empty collections for no-group-by. + + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:None` + + +Put this at the beginning of this method to skip meaningless operations in advance? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:None` + + +Should there be another `else` here? Otherwise it will put the `bucket.getKey` into the results for composite agg as well. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:None` + + +Please include `InternalAutoDateHistogram.Bucket` here as well to skip empty bucket for auto_date_span. + +And also remove the code https://github.com/opensearch-project/sql/blob/1e62fba637e67f2c30de312ea0fa6ca965104b37/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java#L312-L329 here. Previously we add another filter to filter out the empty bucket. + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java:404` + + +Please add test case for `composite - auto_date_span - range - metric`. Set bins to a big enough value like 100 to verify whether the empty buckets are filtered out properly. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java:404` + + +Test added + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Thanks for the suggestion! optimized the logic here + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:None` + + +Refactored as suggested + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java:None` + + +Yep. I used to put all contents in `bucket.getKey` to pass unit tests in a wrong way; corrected the behavior now. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +should change to `explainQueryYaml` + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Fixed. Thanks for reminding! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4399: [Backport 2.19-dev] Enhance the cost computing mechanism and push down context #4353 + +**URL:** https://github.com/opensearch-project/sql/pull/4399 + +**Author:** @qianheng-aws + +**Created:** 2025-09-28T04:17:42Z + +**State:** MERGED + +**Merged:** 2025-09-28T07:11:41Z + +**Changes:** +2403 -1281 (170 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4353 commit https://github.com/opensearch-project/sql/commit/585a04c5de2dedcb3e7c2abf130846caac4dee50) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4398: [Backport 2.19-dev] Push down stats with bins on time field into auto_date_histogram (#4329) + +**URL:** https://github.com/opensearch-project/sql/pull/4398 + +**Author:** @qianheng-aws + +**Created:** 2025-09-28T02:26:39Z + +**State:** MERGED + +**Merged:** 2025-09-28T03:31:12Z + +**Changes:** +228 -10 (9 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4396: [Backport 2.19-dev] Update bin.rst and add `bin` to doctest (#4384) + +**URL:** https://github.com/opensearch-project/sql/pull/4396 + +**Author:** @ahkcs + +**Created:** 2025-09-26T22:53:27Z + +**State:** MERGED + +**Merged:** 2025-09-29T18:23:19Z + +**Changes:** +130 -123 (6 files) + + +## Description + +(cherry picked from #4384 commit 877e6d586287171019b4390231accd7f2a887c2a) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4395: [Backport 2.19-dev] Fix `ClassCastException` for value-storing aggregates on nested PPL fields (#4360) + +**URL:** https://github.com/opensearch-project/sql/pull/4395 + +**Author:** @ahkcs + +**Created:** 2025-09-26T22:51:22Z + +**State:** MERGED + +**Merged:** 2025-09-29T16:34:03Z + +**Changes:** +342 -1 (6 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4360 commit e92bf68f67577ba42112e47232ffe86f6c32c909) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4393: Refactor name resolution in Calcite PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4393 + +**Author:** @ykmr1224 + +**Created:** 2025-09-26T18:24:44Z + +**State:** MERGED + +**Merged:** 2025-10-08T22:27:33Z + +**Changes:** +536 -128 (8 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Made full refactoring on name resolution in Calcite PPL. +- Now it supports names containing `.`, and access to struct field. +- It is needed to support `spath` command with dynamic columns. (I will add logic to resolve dynamic columns in separate PRs) +- I found current logic won't verify the table/alias name while resolving field from correlation, and I couldn't find good way to refer the available table/alias name from correlation variable. We might need to introduce abstraction of stack of scopes to better model the name resolution. + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4307 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - COMMENTED + + +Thanks for refactoring! + + +### @noCharger - APPROVED + + +Nit: Keep a debug log at the entry/exit point of resolve(), showing the incoming qualified name and the chosen resolution path. Remove or greatly reduce logging in private helpers unless it covers an edge case that is hard to reproduce or diagnose. + +If you want to retain some logs for future debugging, comment them with a reference to the bug or scenario they address. + + +## Review Comments + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java:98` + + +Why remove the possible fields list? security concern? + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java:98` + + +It is because it could be incorrect, since it was not considering multiple inputs (join) nor correlation. + + +## General Comments + + +### @ykmr1224 + + +> Nit: Keep a debug log at the entry/exit point of resolve(), showing the incoming qualified name and the chosen resolution path. Remove or greatly reduce logging in private helpers unless it covers an edge case that is hard to reproduce or diagnose. +> +> If you want to retain some logs for future debugging, comment them with a reference to the bug or scenario they address. + +Does `debug` level log still matter? + + +--- + +# PR #4389: [Backport 2.19-dev] Fix bug of missed analyzed node when pushdown filter for Search call (#4388) + +**URL:** https://github.com/opensearch-project/sql/pull/4389 + +**Author:** @LantaoJin + +**Created:** 2025-09-26T06:33:10Z + +**State:** MERGED + +**Merged:** 2025-09-26T07:50:01Z + +**Changes:** +40 -5 (3 files) + + +## Description + +(cherry picked from #4388 commit 06fa7d7ba8f9a7464f3bcbaef4dcd78803cec64b) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4388: Fix bug of missed analyzed node when pushdown filter for Search call + +**URL:** https://github.com/opensearch-project/sql/pull/4388 + +**Author:** @LantaoJin + +**Created:** 2025-09-26T05:51:36Z + +**State:** MERGED + +**Merged:** 2025-09-26T06:21:43Z + +**Changes:** +40 -13 (3 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Fix bug of missed analyzed node when pushdown filter for Search call + +### Related Issues +Resolves #4387 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4386: [Backport 2.19-dev] Prevent limit pushdown before action building instead of in action executing + +**URL:** https://github.com/opensearch-project/sql/pull/4386 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-26T02:28:46Z + +**State:** MERGED + +**Merged:** 2025-09-26T05:15:13Z + +**Changes:** +52 -7 (5 files) + + +## Description + +Backport 18f2280c1b28a4e088ccdc73123635632ba0f06e from #4377. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4384: Update bin.rst and add `bin` to doctest + +**URL:** https://github.com/opensearch-project/sql/pull/4384 + +**Author:** @ahkcs + +**Created:** 2025-09-25T23:03:58Z + +**State:** MERGED + +**Merged:** 2025-09-26T06:13:59Z + +**Changes:** +129 -123 (6 files) + +**Labels:** `documentation`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Update the doctest for bin.rst file +Removed floating point for numeric span + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `docs/user/ppl/cmd/bin.rst:None` + + +Not critical, but these examples might be a bit confusing since it shows duplicate age bins. It is better showing other field to indicate `age` is just showing the bin for each record. + + +### @ykmr1224 on `doctest/test_docs.py:None` + + +Let's reuse existing ones if possible. I think bank can be replaced with accounts, at least. Not sure if we have json with timestamp. + + +### @ykmr1224 on `docs/category.json:None` + + +Let's put items in alphabetical order to easily identify duplicate record. + + +### @vamsimanohar on `doctest/test_docs.py:None` + + ++1 lets reuse unless necessary. + + +### @ahkcs on `docs/user/ppl/cmd/bin.rst:None` + + +Updated some tests to use fields command + + +### @ahkcs on `doctest/test_docs.py:None` + + +We currently don't have json with timestamp so I kept time_test.json, removed bank.json + + +### @ahkcs on `docs/category.json:None` + + +Updated + + +## General Comments + + +### @LantaoJin + + +#4379 is to moving all doctest to calcite. But I can rebase that one. Let's merge this first. + + +--- + +# PR #4382: Update timechart in SPL/PPL cheat sheet + +**URL:** https://github.com/opensearch-project/sql/pull/4382 + +**Author:** @penghuo + +**Created:** 2025-09-25T19:17:37Z + +**State:** MERGED + +**Merged:** 2025-09-25T21:27:17Z + +**Changes:** +4 -4 (1 files) + +**Labels:** `documentation`, `PPL` + +**Assignees:** @penghuo + + +## Description + +### Description +Update timechart in SPL/PPL cheat sheet + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4381: Fix parse related functions return behavior in case of NULL input + +**URL:** https://github.com/opensearch-project/sql/pull/4381 + +**Author:** @songkant-aws + +**Created:** 2025-09-25T14:15:21Z + +**State:** MERGED + +**Merged:** 2025-09-26T09:02:42Z + +**Changes:** +103 -71 (17 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix the bug when input string is NULL, parse/grok/patterns(simple_pattern) returns NULL value instead of empty string. This is to align with V2 behaviors described in the docs. It will reduce breaking changes as well. + +### Related Issues +Resolves #4380 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/utils/ParseUtils.java:33` + + +q: `INTERNAL_REGEXP_EXTRACT` will never be used with the patch? + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/utils/ParseUtils.java:33` + + +Yes, it's not used anymore. Will quickly remove it. + + +## General Comments + + +### @LantaoJin + + +Will the patch change the behavior of `rex` command? + + +### @songkant-aws + + +> Will the patch change the behavior of rex command? + +@LantaoJin No, `rex` command uses its own UDFs and couple of other Calcite internal functions. Its logic is more complex. + + +### @LantaoJin + + +Need to backport manually @songkant-aws + + +--- + +# PR #4379: [Doc] Enable doctest with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4379 + +**Author:** @LantaoJin + +**Created:** 2025-09-25T09:15:01Z + +**State:** MERGED + +**Merged:** 2025-09-29T16:35:06Z + +**Changes:** +237 -278 (8 files) + +**Labels:** `documentation`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `v3.3.0` + + +## Description + +### Description +Followup of #4372 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `docs/user/ppl/functions/math.rst:588` + + +For the double values which are out of the double's precision. Keep the behavior same as Java's `Math.floor(double)` and Splunk's `floor()` math function. + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:119` + + +not sure why this query failed in linux/doctest only. I cannot reproduce in local and IT. Will keep the `PPL>` to avoid failure. +``` + ppl_cmd.process("source = state_country as a | where country = 'USA' OR country = 'England' | left join ON a.name = b.name [ source = occupation | where salary > 0 | fields name, country, salary | sort salary | head 3 ] as b | stats avg(salary) by span(age, 10) as age_span, b.country")Expected: + fetched rows / total rows = 3/3 + +-------------+----------+-----------+ + | avg(salary) | age_span | b.country | + |-------------+----------+-----------| + | null | 40 | null | + | 70000.0 | 30 | USA | + | 100000.0 | 70 | England | + +-------------+----------+-----------+ +Got: + fetched rows / total rows = 2/2 + +-------------+----------+-----------+ + | avg(salary) | age_span | b.country | + |-------------+----------+-----------| + | 70000.0 | 30 | USA | + | 100000.0 | 70 | England | + +-------------+----------+-----------+ +---------------------------------------------------------------------- +``` + + +### @LantaoJin on `docs/category.json:12` + + +I see the https://github.com/opensearch-project/sql/issues/4380 closed. but grok, parse, patterns are still not able to 100% match the result of doctest in v2. @songkant-aws can you fix it or link an issue. Or it just needs to update the doc. + + +### @LantaoJin on `docs/user/ppl/cmd/timechart.rst:97` + + +@selsong , when I change `PPL>` to `os>` in timechart.rst. I found many results cannot not match the expected results added in doc. `timechart` command is a Calcite enabled only command, which means the results in this file shouldn't be impacted by the action of setting calcite enabled as default. Please check why the original expected results cannot match the actual ones. cc @penghuo + + +### @LantaoJin on `docs/user/ppl/cmd/timechart.rst:143` + + +@selsong there are many tests which cannot run in timechart.rst, see the queries starting with `PPL>`. cc @penghuo + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:97` + + +Add issue to track. https://github.com/opensearch-project/sql/issues/4409 + + +### @penghuo on `docs/category.json:12` + + +Reopen https://github.com/opensearch-project/sql/issues/4380 + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4378: Change the default search sort tiebreaker to `_shard_doc` for PIT search + +**URL:** https://github.com/opensearch-project/sql/pull/4378 + +**Author:** @LantaoJin + +**Created:** 2025-09-25T06:49:51Z + +**State:** MERGED + +**Merged:** 2025-10-01T02:39:54Z + +**Changes:** +140 -185 (98 files) + +**Labels:** `enhancement`, `backport-manually`, `v3.3.0` + + +## Description + +### Description +Change the default search sort tiebreaker of PIT search requests to `_shard_doc` after https://github.com/opensearch-project/OpenSearch/pull/18924 being merged. + +Main changes: +1. Remove sort field `_doc` in pushDownFilter(). (`_doc` is a default hits ordering if no specific orders are set) +2. Explicitly add `_shard_doc` as sort tiebreaker of PIT search (~~not 100% sure the current implementation of `_shard_doc` is an implicit sort in PIT search, see~~ [question](https://github.com/opensearch-project/OpenSearch/pull/18924#issuecomment-3342365950)). +3. Keep the legacy engine unchanged. + +### Related Issues +Resolves #3064 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Just to confirm: Do all PIT search requests add `_shard_doc` as tiebreaker field implicitly if no sort provided? + + +## Review Comments + + +### @LantaoJin on `docs/user/optimization/optimization.rst:47` + + +the almost of changes in rst, json and yaml files are moving `sort _doc`, even in non-pit search request, the `sort _doc` is not necessary. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java:210` + + +This line is the primary change of this PR. +Currently, the sort tiebreaker `_shard_doc` is not implicit added in PIT search request. So we explicit sort with `SortBuilders.shardDocSort()`. + + +## General Comments + + +### @LantaoJin + + +Is it a must have for 3.3.0? + + +### @LantaoJin + + +> Just to confirm: Do all PIT search requests add `_shard_doc` as tiebreaker field implicitly if no sort provided? + +I think all PIT search requests should add `_shard_doc` as tiebreaker, or the `_score` will used as implicit sort field in response. But this PR doesn't check all code base, it just changes the all places which use `_doc` + `_id` as tiebreaker to `_shard_doc`. + +I checked PR of `_shard_doc`, seems only PIT search requests can sort by `_shard_doc`. Non-PIT search requests sort by `_shard_doc` throws exception. I will refactor this PR to force v2 and v3 only. The v1 (legacy) code is out of my knowledge, I won't touch them in this PR. + + +### @LantaoJin + + +https://github.com/opensearch-project/OpenSearch/pull/18924 merged just now. So I tested in local that all IT are passed except `GeoIpFunctionsIT` mentioned [here](https://github.com/opensearch-project/sql/pull/4036#issuecomment-3349991642). + +So when https://github.com/opensearch-project/sql/pull/4036 merged and the OpenSearch 3.3.0-SNAPSHOT published with latest code. The CI of this PR should be passed. +cc @penghuo @Swiddis + + +### @Swiddis + + +https://github.com/opensearch-project/sql/pull/4420 still fails tests after merge + + +### @LantaoJin + + +> #4420 still fails tests after merge + +Seems #4420 contains some changes that are not in this PR. Let me update from upstream and run CI again. + + +### @RyanL1997 + + +> https://github.com/opensearch-project/sql/actions/runs/18148240816/job/51654012596?pr=4378#step:6:481 + +```bash +CalciteNoPushdownIT > org.opensearch.sql.calcite.remote.CalciteSearchCommandIT.testSearchWithComplexChainedExpressions FAILED +REPRODUCE WITH: ./gradlew ':integ-test:integTest' --tests 'org.opensearch.sql.calcite.remote.CalciteSearchCommandIT.testSearchWithComplexChainedExpressions' -Dtests.seed=FD9C48B5A496F6A0 -Dtests.security.manager=false -Dtests.locale=ff-SN -Dtests.timezone=America/Santo_Domingo -Druntime.java=21 + java.lang.AssertionError: expected:<0> but was:<1> + at __randomizedtesting.SeedInfo.seed([FD9C48B5A496F6A0:5AB6F276A0DA7BAD]:0) + at org.junit.Assert.fail(Assert.java:89) + at org.junit.Assert.failNotEquals(Assert.java:835) + at org.junit.Assert.assertEquals(Assert.java:647) + at org.junit.Assert.assertEquals(Assert.java:633) + at org.opensearch.sql.util.MatcherUtils.verifyNumOfRows(MatcherUtils.java:188) + at org.opensearch.sql.ppl.SearchCommandIT.testSearchWithComplexChainedExpressions(SearchCommandIT.java:1316) + at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103) + at java.base/java.lang.reflect.Method.invoke(Method.java:580) + at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) + at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) + at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) + at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at org.junit.rules.RunRules.evaluate(RunRules.java:20) + at org.apache.lucene.tests.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:48) + at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) + at org.apache.lucene.tests.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:45) + at org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) + at org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) + at org.junit.rules.RunRules.evaluate(RunRules.java:20) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) + at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) + at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) + at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) + at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) + at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) + at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) + at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at org.apache.lucene.tests.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:38) + at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) + at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at org.apache.lucene.tests.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) + at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) + at org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) + at org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) + at org.apache.lucene.tests.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:47) + at org.junit.rules.RunRules.evaluate(RunRules.java:20) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) + at java.base/java.lang.Thread.run(Thread.java:1583) +``` + + +### @RyanL1997 + + +this is the same failure on #4344 + + +### @LantaoJin + + +> > https://github.com/opensearch-project/sql/actions/runs/18148240816/job/51654012596?pr=4378#step:6:481 +> +> ```shell +> CalciteNoPushdownIT > org.opensearch.sql.calcite.remote.CalciteSearchCommandIT.testSearchWithComplexChainedExpressions FAILED +> REPRODUCE WITH: ./gradlew ':integ-test:integTest' --tests 'org.opensearch.sql.calcite.remote.CalciteSearchCommandIT.testSearchWithComplexChainedExpressions' -Dtests.seed=FD9C48B5A496F6A0 -Dtests.security.manager=false -Dtests.locale=ff-SN -Dtests.timezone=America/Santo_Domingo -Druntime.java=21 +> java.lang.AssertionError: expected:<0> but was:<1> +> at __randomizedtesting.SeedInfo.seed([FD9C48B5A496F6A0:5AB6F276A0DA7BAD]:0) +> at org.junit.Assert.fail(Assert.java:89) +> at org.junit.Assert.failNotEquals(Assert.java:835) +> at org.junit.Assert.assertEquals(Assert.java:647) +> at org.junit.Assert.assertEquals(Assert.java:633) +> at org.opensearch.sql.util.MatcherUtils.verifyNumOfRows(MatcherUtils.java:188) +> at org.opensearch.sql.ppl.SearchCommandIT.testSearchWithComplexChainedExpressions(SearchCommandIT.java:1316) +> at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103) +> at java.base/java.lang.reflect.Method.invoke(Method.java:580) +> at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) +> at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) +> at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) +> at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) +> at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) +> at org.junit.rules.RunRules.evaluate(RunRules.java:20) +> at org.apache.lucene.tests.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:48) +> at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) +> at org.apache.lucene.tests.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:45) +> at org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) +> at org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) +> at org.junit.rules.RunRules.evaluate(RunRules.java:20) +> at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) +> at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) +> at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) +> at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) +> at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) +> at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) +> at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) +> at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) +> at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) +> at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) +> at org.apache.lucene.tests.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:38) +> at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) +> at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) +> at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) +> at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) +> at org.apache.lucene.tests.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) +> at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) +> at org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) +> at org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) +> at org.apache.lucene.tests.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:47) +> at org.junit.rules.RunRules.evaluate(RunRules.java:20) +> at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) +> at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) +> at java.base/java.lang.Thread.run(Thread.java:1583) +> ``` + +@RyanL1997 @Swiddis , confirmed with @yuancu , seems a test bug triggered at Oct 1st. To unblock our release, I have ignored it in the last commit and @yuancu will look into it offline. + + +### @LantaoJin + + +Reminder: Do not backport it to 2.19-dev since it calls an API only provided in OS core 3.3.0. + + +### @penghuo + + +> > Just to confirm: Do all PIT search requests add `_shard_doc` as tiebreaker field implicitly if no sort provided? +> +> I think all PIT search requests should add `_shard_doc` as tiebreaker, or the `_score` will used as implicit sort field in response. But this PR doesn't check all code base, it just changes the all places which use `_doc` + `_id` as tiebreaker to `_shard_doc`. +> +> I checked PR of `_shard_doc`, seems only PIT search requests can sort by `_shard_doc`. Non-PIT search requests sort by `_shard_doc` throws exception. I will refactor this PR to force v2 and v3 only. The v1 (legacy) code is out of my knowledge, I won't touch them in this PR. + +For search=index query, What is performance difference sort[_doc, _id], vs sort[_shard_doc] ? +Should we use `_shard_docs` as tiebreaker only? + + +--- + +# PR #4377: Prevent limit pushdown before action building instead of in action executing + +**URL:** https://github.com/opensearch-project/sql/pull/4377 + +**Author:** @LantaoJin + +**Created:** 2025-09-25T05:58:21Z + +**State:** MERGED + +**Merged:** 2025-09-26T01:19:45Z + +**Changes:** +44 -0 (5 files) + +**Labels:** `bug`, `pushdown`, `backport 2.19-dev`, `v3.3.0` + + +## Description + +### Description +https://github.com/opensearch-project/sql/pull/3713 tried to prevent push down limit with offset reach maxResultWindow in action being executing (applying) which didn't work in v3. + +Fixing: Prevent limit pushdown before action building instead of in action executing + +### Related Issues +Resolves #4376 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +This blocks CI workflow in #4372 + + +--- + +# PR #4375: [Experimental] Support direct query data sources + +**URL:** https://github.com/opensearch-project/sql/pull/4375 + +**Author:** @lezzago + +**Created:** 2025-09-24T22:07:15Z + +**State:** MERGED + +**Merged:** 2025-09-26T16:45:46Z + +**Changes:** +10475 -489 (98 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Support for queries other data sources like prometheus via REST API. +PR is a merge from [feature/direct-query-prometheus](https://github.com/opensearch-project/sql/tree/feature/direct-query-prometheus). It is not a straight merge as that PR shows more changes than there are: #4357. + +Most changes are isolated in the `direct-query` and `direct-query-core` modules + +Introduces 3 new APIs: +- Direct query: Query non-opensearch datasources like Prometheus with PromQL + - uri: `GET /_plugins/_directquery/_query/{dataSources}` +- Read resources: Fetch resources from non-opensearch datasources like Prometheus to fetch labels/series/alerts + - base uri: `GET /_plugins/_directquery/_resources/{dataSource}` + - example: `GET /_plugins/_directquery/_resources/{dataSource}/api/v1/{resourceType}/{resourceName}/values` +- Write resources: Write resources to non-opensearch datasources like Prometheus to put alert manager silences + - base uri: `POST /_plugins/_directquery/_resources/{dataSource}` + - example: `POST /_plugins/_directquery/_resources/{dataSource}/alertmanager/api/v2/{resourceType}` + +### Related Issues +#4229 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @joshuali925 - COMMENTED + + +> It is not a straight merge as that PR shows more changes than there are: https://github.com/opensearch-project/sql/pull/4357 + +Probably because I squashed https://github.com/opensearch-project/sql/pull/4374, GitHub had conflict to rebase it and merge is disabled here. Squash doesn't really merge it with main, and that commit content will show in the `diff main...feature/direct-query-prometheus` during review. + +If you want to send the PR from feature branch let me know, i might be able to push the merge commit. But if not this works as well + + +### @Swiddis - APPROVED + + +I looked over many of the original PRs going into this and don't have major architectural concerns. I'm okay to merge this as-is and address any smaller issues later. (Note: haven't carefully looked over the code here, this is a very large change.) + +I'd like if you could add testing details for this though: what's the testing plan for the new module? How is it different from our current one? Any known bugs/operational changes we should be aware of? + + +## Review Comments + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/format/DirectQueryResourcesRequestConverter.java:None` + + +nit. remove comments? + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/ExecuteDirectQueryActionRequest.java:36` + + +i had the question since beginning: are these actionable TODO items, or are we ok to just leave them? I believe this is for transport layer communications, which we don't have a use case, so "real implementation" would mean when this request is being used at transport laayer? + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/GetDirectQueryResourcesActionRequest.java:None` + + +do we want to call this ReadDirectQueryResources since we are adding Write? + + +### @joshuali925 on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/ExecuteDirectQueryRequest.java:32` + + +i don't think we need CW comments here? also might be better to use javadoc syntax + + +### @joshuali925 on `direct-query-core/src/main/java/org/opensearch/sql/directquery/DirectQueryExecutorServiceImpl.java:40` + + +direct query (non async query) shouldn't have a query id? + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/ExecuteDirectQueryActionRequest.java:36` + + +We have no current requirements for transport layer. We want to support this when its GA, but there will be a bit of refactoring needed, so was not worth to implement this yet. + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/format/DirectQueryResourcesRequestConverter.java:None` + + +ack + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/GetDirectQueryResourcesActionRequest.java:None` + + +ack + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/ExecuteDirectQueryRequest.java:32` + + +ack + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/directquery/DirectQueryExecutorServiceImpl.java:40` + + +I was thinking of keeping this for now for the purpose of tracing the call to debug errors. + + +## General Comments + + +### @lezzago + + +> Probably because I squashed #4374, GitHub had conflict to rebase it and merge is disabled here. Squash doesn't really merge it with main, and that commit content will show in the `diff main...feature/direct-query-prometheus` during review. +> +> If you want to send the PR from feature branch let me know, i might be able to push the merge commit. But if not this works as well + +I am fine with either choice. If this would be a preferred approach, feel free to try to push a merge commit to fix that. + + +--- + +# PR #4374: Merge changes from main + +**URL:** https://github.com/opensearch-project/sql/pull/4374 + +**Author:** @lezzago + +**Created:** 2025-09-24T19:46:37Z + +**State:** MERGED + +**Merged:** 2025-09-24T20:24:00Z + +**Changes:** +6349 -1480 (276 files) + + +## Description + +### Description +Merge changes from main, so there can be a clean PR to merge the changes from the feature branch to main. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4372: Enable Calcite by default and implicit fallback the unsupported commands + +**URL:** https://github.com/opensearch-project/sql/pull/4372 + +**Author:** @LantaoJin + +**Created:** 2025-09-24T11:44:37Z + +**State:** MERGED + +**Merged:** 2025-09-28T07:12:41Z + +**Changes:** +249 -174 (34 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev`, `v3.3.0` + + +## Description + +### Description +1. Set the default value of `plugins.calcite.enabled` to true +2. Implicit to fallback to v2 for any failure with `CalciteUnsupportException`: + https://github.com/opensearch-project/sql/blob/d6394dff93a2b8ae458137bfec8674dbfc0c6821/docs/user/ppl/limitations/limitations.rst#unsupported-functionalities-in-calcite-engine + +### Related Issues +Resolves #4314 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - COMMENTED + + +~~We should remove combine ppl_cli and ppl_cli_calcite in category.json.~~ + +I see. it tracked in seperate PR. https://github.com/opensearch-project/sql/pull/4377 + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/information_schema.rst:None` + + +information_schema is not a command. @vamsimanohar please help confirm. if it not not a command, we should remove it from command list. + + +### @vamsimanohar on `docs/user/ppl/cmd/information_schema.rst:None` + + +You are right, its not a command. information_schema is a default schema for metadata information. ideally this should be part of describe command. +Lets remove it. +Prometheus is also moving to direct_query_plugin. Shouldn't be a big issue fi we remove it. + + + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:266` + + +Do we have a list of command or common cases that may trigger fallback? Just a little concern the fallback logic become more complicated (Calcite -> V2 -> legacy). + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:266` + + +> Do we have a list of command or common cases that may trigger fallback? Just a little concern the fallback logic become more complicated (Calcite -> V2 -> legacy). + +[Unsupported Functionalities in Calcite Engine](https://github.com/opensearch-project/sql/blob/d6394dff93a2b8ae458137bfec8674dbfc0c6821/docs/user/ppl/limitations/limitations.rst#unsupported-functionalities-in-calcite-engine) + + +### @LantaoJin on `docs/user/ppl/cmd/information_schema.rst:None` + + +`cmd/information_schema.rst` was removed and its content was moved to `ppl/admin/datasources.rst` + + +## General Comments + + +### @LantaoJin + + +``` +RestHandlerClientYamlTestSuiteIT > test {yaml=issues/3102/Prevent push down limit if the offset reach max_result_window} FAILED + java.lang.AssertionError: Failure at [issues/3102:26]: expected [2xx] status code but api [ppl] returned [500 Internal Server Error] [{ + "error": { + "reason": "There was internal problem at backend", + "details": "java.sql.SQLException: Error while preparing plan [LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(id=[$0])\n LogicalSort(offset=[1], fetch=[1])\n CalciteLogicalIndexScan(table=[[OpenSearch, test]])\n]", + "type": "RuntimeException" + }, + "status": 500 + }] +``` +This CI failure not related. It's a bug triggered by enabling Calcite. https://github.com/opensearch-project/sql/pull/4377 will fix it. + + + +--- + +# PR #4371: [Backport 2.19-dev] Optimize count aggregation performance by utilizing native doc_count in v3 (#4337) + +**URL:** https://github.com/opensearch-project/sql/pull/4371 + +**Author:** @LantaoJin + +**Created:** 2025-09-24T10:00:20Z + +**State:** MERGED + +**Merged:** 2025-09-24T12:23:20Z + +**Changes:** +892 -69 (59 files) + + +## Description + +(cherry picked from #4337 commit 8c417f4f7acafa702a405ad363b5ed3fb3c75810) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4370: [Backport 2.19-dev] No index found with given index pattern should throw IndexNotFoundException + +**URL:** https://github.com/opensearch-project/sql/pull/4370 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-24T09:01:11Z + +**State:** MERGED + +**Merged:** 2025-09-24T09:38:12Z + +**Changes:** +81 -1 (5 files) + + +## Description + +Backport f2db1212b2d8db718e2ef97bd6197bdecc7c7351 from #4369. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4369: No index found with given index pattern should throw IndexNotFoundException + +**URL:** https://github.com/opensearch-project/sql/pull/4369 + +**Author:** @LantaoJin + +**Created:** 2025-09-24T07:00:58Z + +**State:** MERGED + +**Merged:** 2025-09-24T09:00:55Z + +**Changes:** +81 -1 (5 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +No index found with given index pattern should throw IndexNotFoundException + +### Related Issues +Resolves #4342 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4368: Avoid unnecessary security plugin download in integ-test + +**URL:** https://github.com/opensearch-project/sql/pull/4368 + +**Author:** @ykmr1224 + +**Created:** 2025-09-24T06:17:05Z + +**State:** MERGED + +**Merged:** 2025-09-25T00:37:13Z + +**Changes:** +15 -6 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +- Avoid unnecessary security plugin download in integ-test +- Remove confusing output in gradlew task executions. + +Before: (it shows output from `project :integ-test` even though it is `:core:test`) +``` +% ./gradlew :core:test + +> Configure project :integ-test +Security Plugin File Already Exists +Download https://raw.githubusercontent.com/opensearch-project/security/main/bwc-test/src/test/resources/security/esnode.pem +Download https://raw.githubusercontent.com/opensearch-project/security/main/bwc-test/src/test/resources/security/esnode-key.pem +Download https://raw.githubusercontent.com/opensearch-project/security/main/bwc-test/src/test/resources/security/root-ca.pem +Security Plugin File Already Exists +Download https://raw.githubusercontent.com/opensearch-project/security/main/bwc-test/src/test/resources/security/esnode.pem +Download https://raw.githubusercontent.com/opensearch-project/security/main/bwc-test/src/test/resources/security/esnode-key.pem +Download https://raw.githubusercontent.com/opensearch-project/security/main/bwc-test/src/test/resources/security/root-ca.pem +======================================= +OpenSearch Build Hamster says Hello! + Gradle Version : 8.14 + OS Info : Mac OS X 15.6.1 (aarch64) + JDK Version : 21 (Amazon Corretto JDK) + JAVA_HOME : /Users/moritato/.sdkman/candidates/java/21.0.8-amzn + Random Testing Seed : 617D105F7E5EFA0B + Crypto Standard : any-supported +======================================= + +> Task :core:compileTestJava +Note: Some input files use or override a deprecated API. +... +``` + +After: (no output related to `project :integ-test`) +``` +% ./gradlew :core:test +======================================= +OpenSearch Build Hamster says Hello! + Gradle Version : 8.14 + OS Info : Mac OS X 15.6.1 (aarch64) + JDK Version : 21 (Amazon Corretto JDK) + JAVA_HOME : /Users/moritato/.sdkman/candidates/java/21.0.8-amzn + Random Testing Seed : 9CDFA2D1386FFC7B + Crypto Standard : any-supported +======================================= + +> Task :core:compileJava +/Volumes/workplace/sql/core/src/main/java/org/opensearch/sql/executor/QueryService.java:94: warning: [removal] AccessController in java.security has been deprecated and marked for removal +... +``` + + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `integ-test/build.gradle:258` + + +LGTM -- Since we're modifying it already, can we fix this typo? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4367: Fix timezone dependent test failures + +**URL:** https://github.com/opensearch-project/sql/pull/4367 + +**Author:** @ykmr1224 + +**Created:** 2025-09-24T05:50:21Z + +**State:** MERGED + +**Merged:** 2025-09-25T00:36:47Z + +**Changes:** +91 -52 (8 files) + +**Labels:** `maintenance`, `backport 2.19-dev` + + +## Description + +### Description +- Fix timezone dependent test failures. +- Those tests were failing while local date is different between UTC and local timezone. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +Thanks for this! This tripped me up a ton when I first saw it + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Is backpacking 2.19-dev required? @ykmr1224 + + +### @Swiddis + + +> Is backpacking 2.19-dev required? + +Only if you work in PST 😢 + + +--- + +# PR #4360: Fix `ClassCastException` for value-storing aggregates on nested PPL fields + +**URL:** https://github.com/opensearch-project/sql/pull/4360 + +**Author:** @ahkcs + +**Created:** 2025-09-23T20:34:09Z + +**State:** MERGED + +**Merged:** 2025-09-26T06:50:33Z + +**Changes:** +332 -1 (6 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +## Summary + +Value-storing aggregate functions (`first`, `last`, `min`, `max`) failed with a `ClassCastException` when targeting deeply nested field paths such as `resource.attributes.telemetry.sdk.language`. +This PR fixes nested-field value extraction in OpenSearch SQL by handling `Map`/`HashMap` objects that represent nested structures, eliminating the unsafe cast and restoring expected behavior for value-storing aggregates. + +--- + +## Problem + +These queries previously failed with `ClassCastException` for value-storing aggregates: + +```spl +source=logs-otel-v1* | stats first(`resource.attributes.telemetry.sdk.language`) by severityNumber +source=logs-otel-v1* | stats last(`resource.attributes.telemetry.sdk.language`) by severityNumber +source=logs-otel-v1* | stats min(`resource.attributes.telemetry.sdk.name`) by severityNumber +source=logs-otel-v1* | stats max(`resource.attributes.telemetry.sdk.name`) by severityNumber +``` + +While simple fields worked: + +```spl +source=logs-otel-v1* | stats first(severityNumber) +``` + +> Note: Calculation-only aggregates like `count()` and `dc()` were unaffected because they don’t store field values. + +Observed error: + +```json +{ + "error": { + "reason": "There was internal problem at backend", + "details": "class java.util.HashMap cannot be cast to class java.lang.String", + "type": "ClassCastException" + } +} +``` + +--- + +## Root Cause + +Execution-path analysis showed the failure at the final type-conversion layer in value-storing aggregates: + +1. **Field resolution** returned partial `Map` structures (e.g., `{attributes={telemetry={sdk={language=java}}}}`) instead of the terminal primitive (`"java"`). +2. **Aggregates** (`first`, `last`, `min`, `max`) stored these `Map` objects in their accumulators. +3. **Type conversion** later attempted `(String) value` in `ObjectContent.stringValue()` where `value` was a `Map`. + +Why calculation-only aggregates work: functions like `count()` and `dc()` perform calculations without storing the actual field values, so they never hit this conversion path. + +--- + +## Solution + +Implement nested-map handling in `ObjectContent.stringValue()` so value-storing aggregate outputs can be safely converted to strings. + +**Key changes** + +* Detect `Map` values and recursively extract the terminal primitive via `extractFinalPrimitiveValue(...)`. +* Preserve existing behavior for non-`Map` values (backward compatible). + +**Before** + +```java +return (String) value; // throws ClassCastException if value is a Map +``` + +**After** + +```java +if (value instanceof Map) { + Map map = (Map) value; + Object finalValue = extractFinalPrimitiveValue(map); // drills down single-value maps + if (finalValue != null && !(finalValue instanceof Map)) { + return finalValue.toString(); + } + return map.toString(); +} +return (String) value; +``` + +**Example extraction** +Input: `{attributes={telemetry={sdk={language=java}}}}` +→ `{telemetry={sdk={language=java}}}` +→ `{sdk={language=java}}` +→ `{language=java}` +→ `"java"` + +--- + +## Verification (now succeeds) + +```spl +source=logs-otel-v1* | stats first(`resource.attributes.telemetry.sdk.language`) by severityNumber +source=logs-otel-v1* | stats last(`resource.attributes.telemetry.sdk.language`) by severityNumber +source=logs-otel-v1* | stats min(`resource.attributes.telemetry.sdk.language`) by severityNumber +source=logs-otel-v1* | stats max(`resource.attributes.telemetry.sdk.language`) by severityNumber +``` + +## Resolves + +* Resolves #4359 + + + +## Reviews + + +### @LantaoJin - APPROVED + + +LGTM if the CI rerun passed. + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1356` + + +What if the type of nested `language` is an integer, can the ppl work? + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1356` + + +Yes, in ObjectContent.java, first the `extractFinalPrimitiveValue` method will extract the value as Object and then we'll add toString() to it. + + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1356` + + +No I mean what if the mapping contains +``` + "properties": { + "version": { + "type": "integer" + } +``` +and query with `` | stats min(`resource.attributes.telemetry.sdk.version`) ``. +Can you update the mapping and data, and test above query? + + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1356` + + +Changed the approach - moved the fix from ObjectContent.java to OpenSearchExprValueFactory.construct() for better universal handling. Updated the telemetry mapping to test for types like integer and boolean + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java:547` + + +Is there risk of a RecursionError here? + +If this is only loaded from an index, it's maybe fine (I think you can't index massively nested structures, not sure). But if there's a path where this is executed on user input, it'd be very easy to cause a crash with this. + + + +## General Comments + + +### @LantaoJin + + +@ahkcs please manually backport it, thanks + + +--- + +# PR #4353: Enhance the cost computing mechanism and push down context + +**URL:** https://github.com/opensearch-project/sql/pull/4353 + +**Author:** @qianheng-aws + +**Created:** 2025-09-23T09:44:08Z + +**State:** MERGED + +**Merged:** 2025-09-28T03:29:17Z + +**Changes:** +2342 -1232 (171 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +This PR includes changes: +1. Support cacheing OpenSearchRequestBuilder inner PushContext. Otherwise it will be built twice for the cheapest plan. +2. Ehance the cost computing mechanism by: + - Split method `computeSelfCost` from `estimateRowCount`. In `computeSelfCost`, we've implemented more proper cost computing mechanism rather than relying on rows count only. + - Support more accurate script cost by involving script count rather than the status of whether containing scripts. +3. Move part of our explain IT from json format to yaml format. + +The new cost computing mechanism has advantages in several cases: +- The cost among scans with the same OpenSearch request will get equivilant cost. For example, before this PR, `Scan(PushDownContext[LIMIT, PROJECT])` will get smaller cost than `Scan(PushDownContext[PROJECT, LIMIT])` although they produce the same request. With this PR, they two will get the equivilent cost. And then the optimizer will finally choose the plan with push down sequence more close to the original logical plan. +- Before this PR, the optimizer will choose plan like `Scan(PushDownContext[AGG, PROJECT]) - Aggregate` although the 2 agg could be merged. With this PR, the optimizer will choose the scan with a merged agg pushed down. +- Before this PR, the cases like `https://github.com/opensearch-project/sql/pull/4279#discussion_r2354020132` with several scripts in our agg builder, the optimizer will still choose it since scirpt count won't affect the cost of the plan. This PR will address it. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4312 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/utils/YamlFormatter.java:30` + + +Remove this config, otherwise for the plan of v2 which contains multi-children for one level, it will put the children at beginning and the operator name at the end + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/PushDownContext.java:None` + + +add java doc + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:133` + + +please add a UT for these two method to ensure the behaviour would be stable. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/PushDownContext.java:None` + + +sync offline, let' move this method out + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:146` + + +what the diff with https://github.com/opensearch-project/sql/pull/4353/files#diff-70fd6c948cefa04f6fa28ddd2ccf0e85d6cbce9fa8153b995628e186b1bd918dR109? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:164` + + +could be negative with `-1`? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +Will `Math.pow(1.1, filterDigest.scriptCount() - 1)` be better? It ensures that when there is only one script, the cost of pushed down script filter is smaller than that the same filter in memory ( `dRow * guessedFactor * estimateRowCountFactor < dRow * guessedFactor`, otherwise it's something like `dRow * guessedFactor * 1.1 * estimateRowCountFactor`, which is not necessarily smaller than `dRow * guessedFactor`) + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +The default value of `estimateRowCountFactor` is 0.9, so the cost of pushed-down filter is smaller. + +On the other hand, the biggest reduction of cost from pushed-down filter is not from the cost reduction on filter iteself, but the final output rows count of the scan with filter push down. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:164` + + +It could be but `VolcanoPlanner` in Clacite will replace it with `TINY` cost it's less than 0. + + +## General Comments + + +### @qianheng-aws + + +@LantaoJin There are some enhancement on plan with this PR: +explain_agg_with_sum_enhancement.yaml +explain_timechart_count.yaml +explain_filter_then_limit_push.yaml +explain_agg_counts_by1 +explain_agg_counts_by2 +explain_filter_push.yaml +explain_filter_push_compare_date_string.yaml +explain_filter_push_compare_time_string.yaml +explain_filter_push_compare_timestamp_string.yaml + +And https://github.com/opensearch-project/sql/pull/4377 is no longer needed after this PR. I've removed the code and keep the yaml test only. + + +--- + +# PR #4352: change Anonymizer to mask PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4352 + +**Author:** @xinyual + +**Created:** 2025-09-23T05:14:19Z + +**State:** MERGED + +**Merged:** 2025-09-24T06:50:16Z + +**Changes:** +209 -159 (2 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +The PR change Anonymizer to mask PPL as SQL. +Basically, we follow the rules: +1. Mask table name to `table` +2. Mask the column name to `identifier` +3. Mask customer input literal to `***` like regex +4. All the rename after as would be replaced to `identifier` +5. The new column create by eval will be masked to `identifier` like `eval identifer=....` + +Some examples, +Example1 +`source=my_table as alias_table_name | rename my_column as alias_column | fields alias_column` -> `source=table as identifier | rename identifier as identifier | fields identifier` +Exampl2 +`source=table1 as t1 | right join max=0 on t1.id = t2.id table2 as t2 | fields t1.id` -> `source=table as identifier | right join max=*** left = identifier right = identifier on identifier = identifier table as identifier | fields + identifier` + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] +#4125 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - APPROVED + + +Hi @xinyual , thanks for the change. And it is LGTM! + + +## Review Comments + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:667` + + +nit: just being curious, why we need to also mask off the `max_match`? This should just be an integer of how many matches a rex command's regex pattern can be matched. (but yes im ok to mask off this too) + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4345: Implement one-batch lookahead for index enumerators + +**URL:** https://github.com/opensearch-project/sql/pull/4345 + +**Author:** @Swiddis + +**Created:** 2025-09-22T05:38:19Z + +**State:** MERGED + +**Merged:** 2025-12-01T18:46:01Z + +**Changes:** +453 -75 (20 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +In local benchmarking of merge operations, I saw we were spending a lot of time waiting for synchronous fetching of batches across both indices. + +Because of the PIT-based design, we can't parallelize page fetches directly, but one low-hanging fruit here is to start fetching the next batch as soon as we get the current one, so by the time we start the next batch it'll already be halfway ready. This cuts enumerated merge times by ~40%. + +To implement this safely, this PR needs to do a few things: +- Register a new thread pool that has authentication context (we can't run background threads if we don't do this) + - See `SQLPlugin.java` changes. I also fixed our thread configuration settings. + - We need a new pool as we'll hang the worker pool if there's only one thread. +- Safely handle whether we have a NodeClient or not within the Calcite enumeration inner loop + - This was the interface change in `OpenSearchClient.java`, I did several plumbing changes around that update. +- Actually implement the background scanner, with a fallback to synchronous scanning if we're missing node context. `BackgroundSearchScanner.java` + +Some alternatives for the long-term: +- Implement range-based/adaptive parallel fetching +- Skip paginating with the rest client and just go directly through Lucene +- Core is working on streaming queries: https://github.com/opensearch-project/OpenSearch/issues/18725 + +In draft pending testing. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:324` + + +This previous null prefix meant that we were literally registering the global YML setting `null` to configure the SQL thread pool: + +```yml +null: + size: 30 +``` + +I updated this to use the proper thread_pool setting prefix with accurate naming. This also meant tweaking the thread pool to be named with an underscore, which should be a safe change. New settings are shown in the docs change. + + +### @vamsimanohar on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:324` + + +unfortunate. + + +### @vamsimanohar on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:326` + + +I am not sure if this is the best number. + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java:53` + + +This shows that we have good number of use cases dependent on NodeClient. + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java:99` + + +I am surprised this is working without copying the thread context like this: https://github.com/opensearch-project/sql/blob/main/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java#L39 + + +### @vamsimanohar on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:329` + + +How would it be different if we use the same thread pool or increase the size of the same pool and submit the task to same. + + +Threads from all the pools on the node will take turns for the same cores available. + +`Now double the number of threads from the sql plugin will be contending for the cpu.` Just food for thought: is it good or bad? + +Now that PPL is becoming front and center in discover I don't mind doing this. + + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java:99` + + +What am I missing? Why do we need context copy in other file. Can you print the thread context in this task and see if it has user credentials in case of FGAC? + +PPLPermissionsIT can we add a join test here. + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java:99` + + +This isn't just used for joins, every query goes through this interface. I realized while benchmarking that there were no queries that weren't hitting the pool. That the security ITs pass means either this works or we don't have security ITs. + +I believe it works because we supply the cluster settings during the construction of the executor, so it's built-in to the thread context (as opposed to starting a fresh thread with no executor) + + +### @Swiddis on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:329` + + +The background threads have very light work, it's just serialization costs. They'll take virtually no resources compared to the search and worker threads. Modern threads are cheap so they'll pretty much just sit around in the process table waiting for the network response event. + + +### @Swiddis on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:326` + + +Ideally it should match the number of search threads since that's where all the requests go, maybe I can find where that number is stored and do a lookup. + + +### @Swiddis on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:326` + + +Updated to pull the search thread pool count if available, otherwise fallback to node processors. This is what it looks like if you limit the search thread pool under heavy load: + +image + +Intuitively this seems like a pretty informative view of what state the cluster's in regarding SQL queries. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java:None` + + +[SQL has metrics](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-cloudwatchmetrics.html#managedomains-cloudwatchmetrics-sql) depend on sql-worker. Could u double confirm name change with not break the metrics? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:78` + + +This constructor only been called once per query? I found comments, Could u double confirm? +``` + /** + * This Enumerator may be iterated for multiple times, so we need to create opensearch request for + * each time to avoid reusing source builder. That's because the source builder has stats like PIT + * or SearchAfter recorded during previous search. + */ + @Override + public Enumerable<@Nullable Object> scan() { + return new AbstractEnumerable<>() { + @Override + public Enumerator enumerator() { + OpenSearchRequestBuilder requestBuilder = getOrCreateRequestBuilder(); + return new OpenSearchIndexEnumerator( + osIndex.getClient(), + getFieldPath(), + requestBuilder.getMaxResponseSize(), + requestBuilder.getMaxResultWindow(), + osIndex.buildRequest(requestBuilder), + osIndex.createOpenSearchResourceMonitor()); + } + }; + } +``` + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java:144` + + +what if fixedThreadPool full, should fallback to sync? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:None` + + +Why use client.search instead of bgScanner.fetchNextBatch in this case? + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java:None` + + +Oh, that's annoying... This thread pool is as far as I know the only one in OpenSearch to use a hyphen instead of an underscore :/ + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:78` + + +It's the same as current behavior, right? If you recreate the enumerator with a new client, you erase all of its current state and start a new search. In that snippet it looks like this is deliberately meant to restart the search multiple times + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java:144` + + +We just buffer: https://github.com/opensearch-project/sql/pull/4345#discussion_r2383251023 + +If we fallback to sync, it eliminates the utility of being able to directly view/control the active SQL network requests via the BG thread pool + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:None` + + +Since it's the same client in both instances, it doesn't make a difference (fetchNextBatch would force a sync fetch). I can change it for consistency + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:78` + + +The concerns is, If scan() metheod is been called multiple times in planning stage, it will invoke startScanning multiple times. + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:78` + + +Woah, I wouldn't have expected planning to make a call to scan, seems weird... Can try to find a better way to handle that, but scan intuitively to me means "actually start scanning something" + + +## General Comments + + +### @Swiddis + + +Security IT failures are confusing me here -- seems like they're all consistently failing but the changed code doesn't show up anywhere in any of the stack traces + + +### @Swiddis + + +Some additional testing info: + +I took 5 million records from the big5 benchmarking dataset and compared the current `mainline` with this. + +First, as sanity, the results are the same for one of the queries requiring a full index enumeration: + +``` +source = big5 +| eval range_bucket = case( + `metrics.size` < -10, 'range_1', + `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', + `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', + `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', + `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', + `metrics.size` >= 2000, 'range_6') +| stats count() by range_bucket, span(`@timestamp`, 1h) as auto_span +| sort + range_bucket, + auto_span + +Current mainline: + +fetched rows / total rows = 48/48 ++---------+---------------------+--------------+ +| count() | auto_span | range_bucket | +|---------+---------------------+--------------| +| 122464 | 2022-12-31 16:00:00 | range_5 | +| 121585 | 2022-12-31 17:00:00 | range_5 | +| 122052 | 2022-12-31 18:00:00 | range_5 | +| 122220 | 2022-12-31 19:00:00 | range_5 | +| 122163 | 2022-12-31 20:00:00 | range_5 | +| 121840 | 2022-12-31 21:00:00 | range_5 | +| 121606 | 2022-12-31 22:00:00 | range_5 | +| 121889 | 2022-12-31 23:00:00 | range_5 | +| 121088 | 2023-01-01 00:00:00 | range_5 | +| 121943 | 2023-01-01 01:00:00 | range_5 | + +After update: + +fetched rows / total rows = 48/48 ++---------+---------------------+--------------+ +| count() | auto_span | range_bucket | +|---------+---------------------+--------------| +| 122464 | 2022-12-31 16:00:00 | range_5 | +| 121585 | 2022-12-31 17:00:00 | range_5 | +| 122052 | 2022-12-31 18:00:00 | range_5 | +| 122220 | 2022-12-31 19:00:00 | range_5 | +| 122163 | 2022-12-31 20:00:00 | range_5 | +| 121840 | 2022-12-31 21:00:00 | range_5 | +| 121606 | 2022-12-31 22:00:00 | range_5 | +| 121889 | 2022-12-31 23:00:00 | range_5 | +| 121088 | 2023-01-01 00:00:00 | range_5 | +| 121943 | 2023-01-01 01:00:00 | range_5 | +``` + +Second, I wanted to benchmark and check for impact. I already tested with joins and it's ~40% faster, but for non-joins we potentially pay overhead for nothing. + +For the slowest big5 queries (BG fetches on the left, sync fetches on the right), we see slight perf gains: +image + +For the fastest ones, the performance is approximately the same (some minor latency and throughput diffs but I'm not confident that this isn't just random variation): +image + + + +### @vamsimanohar + + +Added few comments. Good one 👍 . + + +### @Swiddis + + +Turns out I flipped the benchmark in my head, so this is overall a regression -- going to put back in draft and figure out a better approach + + +### @LantaoJin + + +> Turns out I flipped the benchmark in my head, so this is overall a regression -- going to put back in draft and figure out a better approach + +@Swiddis , do you mean the current implementation has performance regression? So why the PR merged finally? If there is no regression, please backport it to 2.19-dev since backporting of #4884 is blocked by this backporting. + + +### @Swiddis + + +Couldn't get the regression to reliably repro & the regression was smaller than the gains in more tests, so I wanted to see what the diff was in the OSB benchmarks. I don't see any benchmark diff since merging. + +Wasn't planning on backporting this originally since it's still largely experimental, can open the PR at least + + +--- + +# PR #4344: [Enhancement] Add error handling for known limitation of sql `JOIN` + +**URL:** https://github.com/opensearch-project/sql/pull/4344 + +**Author:** @RyanL1997 + +**Created:** 2025-09-22T00:18:41Z + +**State:** MERGED + +**Merged:** 2025-10-01T02:05:13Z + +**Changes:** +99 -1 (4 files) + +**Labels:** `enhancement`, `SQL`, `v3.3.0` + + +## Description + +Signed-off-by: Jialiang Liang +### Description +Better error handling for known limitation of sql JOIN + +example: +```bash +❯ curl -X POST "localhost:9200/_plugins/_sql" -H 'Content-Type: application/json' -d' + { + "query": "SELECT SUM(a.balance) FROM accounts a JOIN accounts b ON a.account_number = b.account_number" + }' + +{ + "error": { + "reason": "Invalid SQL query", + "details": "JOIN queries do not support aggregations on the joined result. For more information, see https://docs.opensearch.org/3.2/search-plugins/sql/limitation/#join-does-not-support-aggregations-on-the-joined-result", + "type": "SqlParseException" + }, + "status": 400 +}% +``` + +### Related Issues +* Resolve https://github.com/opensearch-project/sql/issues/4058 +* Official doc: https://docs.opensearch.org/latest/search-plugins/sql/limitation/#join-does-not-support-aggregations-on-the-joined-result + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - COMMENTED + + +A few suggestions -- I think this could be designed better if changing `JoinSelect` doesn't cause cascading breakage, otherwise this approach is fine. + + +### @dai-chen - COMMENTED + + +Did you remove your test accidentally? I don't see any UT or IT now. + + +## Review Comments + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java:None` + + +suggestion: Can we fetch the cluster version and use it instead of `latest`? + +If we ever fix this constraint, this error message will be invalid on all the older versions that output it + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java:None` + + +suggestion (if-minor): Should we move a lot of this logic to the construction of the `JoinSelect`? + +Seems weird to construct the object and then use its setters to fill in everything, instead of just passing in the subquery block and letting it figure itself out. This also would guarantee that this validation is applied everywhere that JoinSelect is constructed. + + +### @Swiddis on `legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java:None` + + +todo: This is a hard-coded copy of the validation's private error message + +We should just make it public & reference it like we do with most of the other strings. I've had a few cases now of trying to clarify error messages and getting several pointless test failures for it + + +### @Swiddis on `legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java:None` + + +praise: Nice idea to make the exception expectation a shared function + +I probably would have done a bunch of `thrown.expect` && `thrown.expectMessage(ERROR_MSG)` copies, this is nicer + + +### @Swiddis on `legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java:None` + + +suggestion: Since we're just leaning on `Select.AGGREGATE_FUNCTIONS`, we could reduce all these aggregation cases to a single [table-driven test](https://go.dev/wiki/TableDrivenTests) + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java:None` + + +good idea. implemented + + +### @RyanL1997 on `legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java:None` + + +I've extract it into the util class with the above dynamic versioning doc link url func. + + +### @RyanL1997 on `legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java:None` + + +good point, and I was actually thinking the same. Fixed. + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java:None` + + +good point. applied the change + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java:None` + + +I think this new validation should be part of `AntlrSqlParseTreeVisitor`. To simplify, when we visit group by, we can confirm if any Join? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java:None` + + +I'm just thinking if we don't support any post-processing after Join operation, should I validate in the opposite way, like above Join operator, the plan tree can only include Project, Limit etc? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/utils/Util.java:None` + + +Not sure if any way we can make sure this won't be broken. + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/utils/Util.java:None` + + ++1 to the concern. However, I'd say to avoid these links in general we actually need some proper link checker for our docs. I know that OSD has some similar framework. - which is kinda off the scope of this task. + +The good thing is that I have implemented the dynamic versioning of the doc link (instead of using the `latest` I was using the current version of OS, and I think for the released version's doc, highly likely we will not change the structure of the url. + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/utils/Util.java:None` + + +If we want to be extra safe we can just remove the `#` anchor and link straight to limitations. The anchor is coupled to the section title and seems likely to change, the overall `limitation` link is probably stable. + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java:None` + + +I found the following: + +1. Two Parser Systems: + - ANTLR Parser: `AntlrSqlParseTreeVisitor` - Used for semantic analysis, type checking + - Druid Parser: `SqlParser.java` - Used for actual query execution (legacy) +2. `JOIN` Processing Flow: + - OpenSearchActionFactory.createAction() determines if it's a `JOIN` query + - If `JOIN`: Uses `Druid` parser → `SqlParser.parseJoinSelect()` → Our current validation + - If not `JOIN`: Uses regular select parsing +3. Current JOIN Validation: + - Happens in Druid-based `SqlParser` during `parseJoinSelect()` + - The current implementation - validation is in `JoinSelect.withValidation()` + +So the ANTLR visitor approach would be for different use cases (like type checking, semantic analysis), but `JOIN` execution uses the Druid parser. But yes, I can try to switch to the Antlr parsing approach and trying to remove the current approach. SInce the the issue will be catched earlier before it reaches to the parser we are working on. + + + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java:None` + + +fixed + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java:None` + + +fixed and moved the checking logic to the antlr visitor layer + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/utils/Util.java:None` + + +fixed + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitor.java:None` + + +I think the current approach of detecting JOINs is unnecessarily complex. Instead, we can simply do it by a small visitor as below? + +``` + @Override + public T visitGroupByItem(OpenSearchLegacySqlParser.GroupByItemContext ctx) { + // Get the parent context (typically the FROM clause area) + ParserRuleContext fromClause = ctx.getParent(); + + // Check for JOINs using a focused visitor + boolean hasJoin = fromClause.accept(new OpenSearchLegacySqlParserBaseVisitor() { + @Override + public Boolean visitTableSourceBase(TableSourceBaseContext ctx) { + return !ctx.joinPart().isEmpty(); // Found a JOIN if joinPart is not empty + } + + @Override + protected Boolean defaultResult() { + return false; + } + + @Override + protected Boolean aggregateResult(Boolean aggregate, Boolean nextResult) { + return aggregate || nextResult; + } + }); +``` + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitor.java:None` + + +good call. fixed. can you take a look again? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitor.java:None` + + +what's this trying to check? + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitor.java:None` + + +transferring some offline communication with @dai-chen here: we should limit the scope of this for only check the explicit join only - so removed this section. + + +## General Comments + + +### @RyanL1997 + + +Added the new tests in `AntlrSqlParseTreeVisitorTest.java` cc @dai-chen + + +### @RyanL1997 + + +#4378 is failing at the same + + +### @Swiddis + + +Failures exist in main and are only with recently added tests, think this is ok to merge + + +--- + +# PR #4337: Optimize count aggregation performance by utilizing native doc_count in v3 + +**URL:** https://github.com/opensearch-project/sql/pull/4337 + +**Author:** @LantaoJin + +**Created:** 2025-09-19T06:58:13Z + +**State:** MERGED + +**Merged:** 2025-09-24T07:28:18Z + +**Changes:** +890 -69 (59 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +For aggregation with `count` aggregator, we can ignore to register a `ValueCountAggregationBuilder`, instead, read the `doc_value` directly from `CompositeAggregation.Bucket.getDocCount()` in `CompositeAggregationParser`. + +Additional, add `isNotNull(FIELD)` for +``` +source=t | stats count(a) +``` +``` + // Before: Aggregate(count(a)) + // \- Project(a) + // \- Scan t + // After: Aggregate(count(a)) + // \- Project(a) + // \- Filter(isNotNull(a)) + // \- Scan t +``` + + + +Note: to minimize the changes, this optimization only effected in v3. But it's very easy to migrate to v2. + +### Related Issues +Resolves #4265 and resolves #4347 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - DISMISSED + + +LGTM + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Based on this idea, shall we apply similar optimization to non bucket count aggregation as well? We can fill the count value with the hits doc count. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java:57` + + +[question] Is the doc_count equal to the actual `count(FIELD)` metrics under all cases? + +My main concern is count aggregation shouldn't count null value while doc count may not. To address this gap, we should add a filter like exist to filter the null value documents + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Sure, will update it + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java:57` + + +sure, we need to add a `"query":{"exists":{"field":"FIELD"}}` in DSL for `count(FIELD)` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +It seems that the `ValueCountAggregationBuilder` is removed in place. Is it returned and re-assigned to `newMetricBuilder` for the sake of clarity? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexScanRule.java:48` + + +fix https://github.com/opensearch-project/sql/issues/4347 + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java:57` + + +We cannot pushdown `doc_count` for `count(FIELD) by` case actually, reverted partial code. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +No, `metricBuilder` is "immutable", I removed `ValueCountAggregationBuilder`s in its' copy. + + +### @penghuo on `integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push1.yaml:7` + + +2147483647 is hard limit? index could has more than 2B docs. + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push1.yaml:7` + + +Not hard code, the `track_total_hits = true` set it to Integer.MAX. + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push1.yaml:7` + + +https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/action/search/SearchPhaseController.java#L858-L877 + +Seems the `Integer.MAX` only indicates whether total hit count for the query should be tracked ACCURATE, rather than a limitation. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:43` + + +comments is exactally same, any difference, + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java:43` + + +oh, just copy the comment. The maxResultWindow is from index.max_result_window. `maxResponseSize` is never used in v3 actually. The value is +``` + public int getMaxResponseSize() { + return pageSize == null ? requestedTotalSize : pageSize; + } +``` + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:848` + + +[non-blocking] We'd do this optimization in a separaete rule instead of here. Otherwise it will affect the basic logical plan by adding a redundant filter. We can do this change as follow-up + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java:100` + + +In which case will `hits.getTotalHits()` return null? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java:100` + + +When the the tracking of total hits is disabled in the request (`track_total_hits=false` or ignore). + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:848` + + +https://github.com/opensearch-project/sql/issues/4390 opened + + +## General Comments + + +### @LantaoJin + + +## Benchmark1: `stats | stats count(UserAgent) by SearchPhrase` + +### Original Composite Bucket + ValueCount: 4800ms +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "groupkey1": { + "terms": { + "field": "SearchPhrase", + "order": "asc" + } + } + } + ] + }, + "aggs": { + "cnt": { + "value_count": { + "field": "UserAgent" + } + } + } + } + } +}' +``` + +### Option1: Doc_Count + Bucket Filter: 5100ms +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "groupkey1": { + "terms": { + "field": "SearchPhrase", + "order": "asc" + } + } + } + ] + }, + "aggs": { + "tmin": { + "filter": { + "exists": { + "field": "UserAgent" + } + } + } + } + } + } +}' +``` + +### ~~Option2: Doc_Count + Query Filter: 3900ms~~ (Cannot add query filter with group-by) +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "query": { + "exists": { + "field": "UserAgent" + } + }, + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "groupkey1": { + "terms": { + "field": "SearchPhrase", + "order": "asc" + } + } + } + ] + } + } + } +}' +``` + +### Baseline: Doc_Count only (`| stats count() by SearchPhrase`): 3000ms +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "groupkey1": { + "terms": { + "field": "SearchPhrase", + "order": "asc" + } + } + } + ] + } + } + } +}' +``` + +## Benchmark2:`stats | stats count(UserAgent) by span(EventTime, 1h)` + +### Original Composite Bucket + ValueCount: 4500ms +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "span1": { + "date_histogram": { + "field": "EventTime", + "order": "asc", + "fixed_interval": "1d" + } + } + } + ] + }, + "aggs": { + "cnt": { + "value_count": { + "field": "UserAgent" + } + } + } + } + } +}' +``` +### Option1: Doc_Count + Bucket Filter: 4800ms +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "span1": { + "date_histogram": { + "field": "EventTime", + "order": "asc", + "fixed_interval": "1d" + } + } + } + ] + }, + "aggs": { + "cnt": { + "value_count": { + "field": "UserAgent" + } + } + } + } + } +}' +``` + +### ~~Option2: Doc_Count + Query Filter: 35ms~~ (Cannot add query filter with group-by) +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "query": { + "exists": { + "field": "UserAgent" + } + }, + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "span1": { + "date_histogram": { + "field": "EventTime", + "order": "asc", + "fixed_interval": "1h" + } + } + } + ] + } + } + } +}' +``` + +### Baseline: Doc_Count only (`| stats count() by span(EventTime, 1h)`): 33ms +``` +curl -XGET "http://localhost:9200/hits/_search" -H 'Content-Type: application/json' -d' +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "span1": { + "date_histogram": { + "field": "EventTime", + "order": "asc", + "fixed_interval": "1d" + } + } + } + ] + } + } + } +}' +``` + +## Conclusion +- Whatever in benchmark1 and benchmark2, the each option2 `Doc_Count + Bucket Filter` is not worth to do. +- ~~In benchmark1, the option1 `Doc_Count + Query Filter` has only 1.23x improvement (3900ms vs 4800ms)~~ +- ~~In benchmark2, the option1 `Doc_Count + Query Filter` has 128x improvement (35ms vs 4500ms)~~ +- We cannot convert to `Doc_Count + Query Filter` in bucket aggregations. Only `Doc_Count only ` works. + + +### @LantaoJin + + +@qianheng-aws @yuancu @noCharger @penghuo can you review this PR? all optimizations completed, includes: + +### 1. pushdown `count() by keys` to `doc_count` +### 2. pushdown `count()` to `hits.total.value` +### 3. pushdown `count(FIELD)` to `FIELD_exists_query` + `hits.total.value` +### 4. fix bug of pushdown not work with two equivalent `count(FIELD)` +### 5. optimization of fetching only one batch for aggregation request. + + +--- + +# PR #4336: [Backport 2.19-dev] Date/Time based Span aggregation should always not present null bucket (#4327) + +**URL:** https://github.com/opensearch-project/sql/pull/4336 + +**Author:** @LantaoJin + +**Created:** 2025-09-19T05:48:10Z + +**State:** MERGED + +**Merged:** 2025-09-24T07:02:17Z + +**Changes:** +559 -237 (32 files) + + +## Description + +(cherry picked from #4327 commit 2ab5002bac5f9b1ac680412488eeb7e21468147b) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4334: Fix alphanumeric search which starts with number + +**URL:** https://github.com/opensearch-project/sql/pull/4334 + +**Author:** @vamsimanohar + +**Created:** 2025-09-19T01:21:26Z + +**State:** MERGED + +**Merged:** 2025-09-29T23:12:00Z + +**Changes:** +197 -188 (17 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `v3.3.0` + +**Assignees:** @vamsimanohar + + +## Description + +### Description + +unquoted search literals which starts with numbers translate to wrong lucene query due to issue with ppl grammar. + + +### Example Query +``` +POST _plugins/_ppl/ +{ + "query": "source =demo-logs-otel-v1-* 5a57f0a17fc6f59fb2ad8ec6b52ea3fa " +} +``` +Truncated Explain Result: +``` +"query_string":{"query":"(5) AND (a57f0a17fc6f59fb2ad8ec6b52ea3fa)"} +``` + +We fixed the issue by introducing proper grammar and as part of the fix, spanlength is moved to lexer above NUMERICID to avoid spanlengths like '1s 1month' to be parsed as NUMERICID + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java:None` + + +Does `\\\` escape is required in RESTfull API? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4333: Add max/min eval functions + +**URL:** https://github.com/opensearch-project/sql/pull/4333 + +**Author:** @ritvibhatt + +**Created:** 2025-09-19T00:20:54Z + +**State:** MERGED + +**Merged:** 2025-09-30T18:30:17Z + +**Changes:** +622 -0 (16 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Add support for min/max statistical eval functions, allowing users to find maximum and minimum values among multiple arguments within a single row. + +- Updated parser to add max and min syntax for eval command +- Added MaxFunction and MinFunction classes with Calcite UDF implementation +- Updated PPLBuiltinOperators and PPLFuncImpTable to register the new functions + +Usage Examples + + -- Returns the larger value between age field and 30 for each row + ``` source=accounts | eval max_age = MAX(age, 30) | fields age, max_age``` + + -- Returns either 'John' or value in firstname depending on what is larger lexicographically + ```source=accounts | eval result = MAX(age, 'John', firstname) | fields age, firstname, result``` + + -- Returns either the value in the age field or 35 + ``` source=accounts | eval result = MIN(age, 35, firstname) | fields age, firstname, result``` +### Related Issues +Resolves #4341 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +Hi @ritvibhatt, thanks for the change. I just left some comments for having a better understanding of the change. + + +### @dai-chen - COMMENTED + + +A high level QQ: is supporting max(int, string) a hard requirement for now? + +I'm just thinking otherwise: + +1. We probably can reuse the existing `array_min/max` function; +2. The implicit conversion (string -> numeric) can be supported in https://github.com/opensearch-project/sql/issues/4349. cc: @penghuo + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java:None` + + +Do we need input validation in the `compareMax`/`compareMin` methods to handle edge cases: +- What happens if we get incomparable types (like arrays or objects)? <- is this possible? +- nit: Should we throw a more specific exception with a clear message in these cases? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java:None` + + +see the above comment for `compareMax`. + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:621` + + +What should be the behavior with `null` values (will the `null` be ignored?): +```bash +source=accounts | eval result = MAX(age, null, 30) | fields age, result +``` + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:607` + + +What should be the behavior of mixed numeric types (int, long, float, double)? + +e.g. +```bash +source=accounts | eval result = MAX(age, 30.5, 28L) | fields age, result +``` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java:None` + + +For arrays/objects it gets converted to a string when comparing so it will be compared lexicographically + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java:None` + + +Same as max, arrays/objects it gets converted to a string when comparing so it will be compared lexicographically + + +### @ritvibhatt on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:621` + + +Just updated so null values will be ignored unless all of the values are null, in which case it will return null. Added tests in ```CalcitePPLEvalMaxMinFunctionIT``` + + +### @ritvibhatt on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:607` + + +Should still be compared numerically and return the largest value from those + + +### @penghuo on `docs/user/ppl/functions/statistical.rst:None` + + +the format looks not correct. https://github.com/ritvibhatt/sql/blob/7203266ac0d716e116e5ea0a9af1ed8902dda008/docs/user/ppl/functions/statistical.rst + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java:None` + + +what is expectataion is max(4, "2")? should be 4, right? + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:607` + + +Maybe relate to the solution of this https://github.com/opensearch-project/sql/issues/4349. Cuz I think 28L should be consider as a string. But yes, it is pointless to compare the string to numeric value from its concept. + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:607` + + +Not a blocker for now. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/data/utils/MaxTypeComparator.java:None` + + +Is `MinTypeComparator` simply the inverse of `MaxTypeComparator`? I'm thinking of only one comparator and use it in Min/MaxFunction below by Java Stream's min/max(comparator) API. + +Also this may worth adding dedicated UT to show its behavior for num-num, num-string, string-string etc. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java:27` + + +what does this Integer.compare with 0 meaning? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java:36` + + +Could you confirm whether comparison between "1" and "2" also considered as numerical comparison? + + +### @ritvibhatt on `docs/user/ppl/functions/statistical.rst:None` + + +Updated the doc + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java:None` + + +Max(4, "2") should result in "2" since strings are always considered larger than numeric values + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/data/utils/MaxTypeComparator.java:None` + + +Updated and added tests thank you! + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java:27` + + +It will normalize it so if the string comparison returns a negative it will make it -1 and if it returns a positive it will be 1. Don't think that is necessary, can remove and just leave the string comparison + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java:36` + + +No they will be compared as strings so max("9", "21") will return "9" + + +## General Comments + + +### @penghuo + + +> A high level QQ: is supporting max(int, string) a hard requirement for now? +> +> I'm just thinking otherwise: +> +> 1. We probably can reuse the existing `array_min/max` function; +> 2. The implicit conversion (string -> numeric) can be supported in #4349. cc: @penghuo + +array_min/max required all arguments have same type. + +max/min required speical handling, implicit cast may not work, e.g. the expactation is +``` +max(20, "4") should return 20 +max(20, "4a") should return 4a +``` +* convert fields to int does not work +``` +array_max(20, 4) should return 20 +array_max(20, null) should return 20 +``` + +* convert fields to string does not work +``` +array_max("20", "4") should return "4" +array_max("20", "4a") should return "4a" +``` + + +### @dai-chen + + +@ritvibhatt I synced with @penghuo offline. For Q2, it doesn’t seem to be a type conversion issue as I originally thought, but rather a data sorting one. For example, we could define a custom comparator in Java to handle the new sorting rule for numeric and string values, and then apply it in min/max/sort APIs. Perhaps we can do something similar within the Calcite accumulator? + + +--- + +# PR #4332: Support `multisearch` command in calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4332 + +**Author:** @ahkcs + +**Created:** 2025-09-18T23:23:36Z + +**State:** MERGED + +**Merged:** 2025-10-02T15:56:28Z + +**Changes:** +1794 -56 (33 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + This PR introduces the multisearch command, a new PPL command that allows combining results from multiple search subsearches into a single unified result set. + + Resolves: #4348 + + **Key Features:** + - Combines results from 2+ search subsearches with different filtering criteria + - **Supports ALL PPL commands** within subsearches (where, eval, fields, stats, sort, dedup, etc.) + - Enables timestamp-based result interleaving for time-series data analysis + - Uses efficient UNION ALL + ORDER BY architecture for optimal performance + + **Example Usage:** + ```sql + -- Basic age group analysis + source=accounts | multisearch + [search source=accounts | where age < 30 | eval age_group = "young"] + [search source=accounts | where age >= 30 | eval age_group = "adult"] + | stats count by age_group + + -- Success rate monitoring pattern + source=logs | multisearch + [search source=logs | where status="success" | eval result="success"] + [search source=logs | where status!="success" | eval result="total"] + | stats count(eval(result="success")) as success_count, count() as total_count + +``` + + + +## Reviews + + +### @ykmr1224 - COMMENTED + + +Question: what happens if sub-searchs have different type for the same field name? + + +### @RyanL1997 - COMMENTED + + +Hi @ahkcs , thanks for the change. I just left some comments. + + +## Review Comments + + +### @dai-chen on `docs/user/ppl/cmd/multisearch.rst:None` + + +since 3.0.0? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +reuse `subSearch`? + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:1041` + + +enforce this in grammar if required? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1671` + + +not sure where the complexity comes. I thought each subsearch can be translated by existing visit logic? +I checked the similar visitAppend and it seems even more complex... + + +### @ahkcs on `docs/user/ppl/cmd/multisearch.rst:None` + + +Updated + + +### @ahkcs on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Updated to reuse `subSearch` + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:1041` + + +enforced + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1671` + + +simplified, the visitMultisearch method currently processes each subsearch using existing visitor logic, aligns their schemas for union compatibility, creates a Calcite union of all results, and also checks for timestamp-based ordering. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Why do we need a copy? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +I think this logic can be single path (simply go through each subsearchNodes, collect field name, and decide fieldOrder, then add to projections. ) + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +This should be common among commands. Currently, we should support `@timestamp`. +ref: https://github.com/opensearch-project/sql/issues/4275 + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Let's use `assertYamlEqualsJsonIgnoreId` for readable plan comparison. You can check the test case `supportSearchSargPushDown_singleRange` for an example. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java:None` + + +Let's have a test with different indices to test schema merge. + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java:322` + + +Why do we need custom table implementation? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +lets remove these verbose comments + + +### @RyanL1997 on `docs/user/ppl/cmd/multisearch.rst:None` + + +Correct me if im wrong: do we actually need this section for configuration? + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java:322` + + ++1 for Tomo's question + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Consider using the imported `java.util.HashSet` or add proper imports instead of fully qualified names. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +This fallback assumes `VARCHAR` but should consider using a more appropriate type or handle the case where no field type is found more gracefully. Or do we actually need this fallback? + + +### @RyanL1997 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +The grammar enforces minimum two subsearches at parse time, but the error message in tests suggests runtime validation. Consider adding a more descriptive parse error. + + +### @RyanL1997 on `docs/user/ppl/cmd/multisearch.rst:None` + + +The limitations section mentions "Streaming Commands Only" but the implementation and tests show non-streaming commands are now supported. Remove or update this section. + + +### @RyanL1997 on `docs/user/ppl/cmd/multisearch.rst:None` + + +The syntax documentation is inconsistent with the actual grammar. L39 shows optional minimum, but L41 states "At least two search subsearches must be specified." + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Removed copy + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Updated to single path + + +### @ahkcs on `docs/user/ppl/cmd/multisearch.rst:None` + + +Updated + + +### @ahkcs on `docs/user/ppl/cmd/multisearch.rst:None` + + +Updated + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Removed fallback + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Updated + + +### @ahkcs on `docs/user/ppl/cmd/multisearch.rst:None` + + +Removed + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +removed + + +## General Comments + + +### @ahkcs + + +> Question: what happens if sub-searchs have different type for the same field name? + +Currently we'll throw exception, for example: `class java.lang.Integer cannot be cast to class java.math.BigDecimal` +I have added IT tests for this behavior + + +### @penghuo + + +> Question: what happens if sub-searchs have different type for the same field name? + ++1. is it common issue for append and multisearch command? if yes, consider fix it in seperate PR. + + +### @ahkcs + + +> > Question: what happens if sub-searchs have different type for the same field name? +> +> +1. is it common issue for append and multisearch command? if yes, consider fix it in seperate PR. + +Created github issue: https://github.com/opensearch-project/sql/issues/4383 to track schema merging for `append` and `multisearch` command + + +### @RyanL1997 + + +lets resolve the conflict + + +### @dai-chen + + +Do we add and benchmark Big5 query for each new command like this? + + +### @ahkcs + + +> Do we add and benchmark Big5 query for each new command like this? + +I think we can add IT tests to benchmark Big5 queries like this, I'll open a separate PR for this: +https://github.com/opensearch-project/sql/pull/4163 + + +### @ahkcs + + +> Do we add and benchmark Big5 query for each new command like this? + +For `Multisearch` command, I think the result will be exactly the same as `Append` command as they share the same implementation + + +--- + +# PR #4329: Push down stats with bins on time field into auto_date_histogram + +**URL:** https://github.com/opensearch-project/sql/pull/4329 + +**Author:** @qianheng-aws + +**Created:** 2025-09-18T11:08:22Z + +**State:** MERGED + +**Merged:** 2025-09-24T12:23:50Z + +**Changes:** +229 -11 (11 files) + +**Labels:** `bug`, `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Push down stats with bins on time field into auto_date_histogram + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4210, +Paritally resolve https://github.com/opensearch-project/sql/issues/4317 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +conflicts now + + + +### @LantaoJin + + +@qianheng-aws please backport it to dev manually. + + +--- + +# PR #4328: [Backport 2.19-dev] [Doc] Correct the comparision table for rex doc + +**URL:** https://github.com/opensearch-project/sql/pull/4328 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-18T03:57:57Z + +**State:** MERGED + +**Merged:** 2025-09-18T17:32:10Z + +**Changes:** +1 -1 (1 files) + + +## Description + +Backport 37f024db3b2796b059e8d8447fe16db41b3ac819 from #4321. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4327: Date/Time based Span aggregation should always not present null bucket + +**URL:** https://github.com/opensearch-project/sql/pull/4327 + +**Author:** @LantaoJin + +**Created:** 2025-09-18T02:50:07Z + +**State:** MERGED + +**Merged:** 2025-09-19T04:33:53Z + +**Changes:** +552 -230 (32 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +``` +| stats agg_func() by span(time, interval) +``` +time-based span aggregation should **always** not present null bucket: +- composite aggregation with `DateHistogramValuesSourceBuilder.missingBucket(false)` + +#### Example Data +| Name | DEPTNO | birthday | +|-------|-----|-----------| +| Alice | 1 | 2024-04-21 | +| Bob | 2 | 2025-08-21 | +| Jeff | null | 2025-04-22 | +| Adam | 2 | null | + +#### Example Query 1 +``` +| stats count() by span(birthday, 1y) as year +``` + +Return +| count() | year | +|-------|-----| +| 1 | 2024-01-01 | +| 2 | 2025-01-01 | + +#### Example Query 2 +``` +| stats count() by span(birthday, 1y) as year, DEPTNO +``` + +Return +| count() | year | DEPTNO | +|-------|-----|-----| +| 1 | 2024-01-01 | 1 | +| 1 | 2025-01-01 | 2 | +| 1 | 2025-01-01 | null | + +#### Example Query 3 +``` +| stats bucket_nullable=false count() by span(birthday, 1y) as year, DEPTNO +``` + +Return +| count() | year | DEPTNO | +|-------|-----|-----| +| 1 | 2024-01-01 | 1 | +| 1 | 2025-01-01 | 2 | + +### Related Issues +Resolves #4318 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4326: [BackportPR 2.19-dev] Fix geopoint issue in complex data types (#4325) + +**URL:** https://github.com/opensearch-project/sql/pull/4326 + +**Author:** @vamsimanohar + +**Created:** 2025-09-18T01:44:09Z + +**State:** MERGED + +**Merged:** 2025-09-18T02:29:13Z + +**Changes:** +295 -60 (8 files) + + +## Description + +(cherry picked from commit d527bbd93cef38b0b92ce80e2e86d0a4051997e8) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4325: Fix geopoint issue in complex data types + +**URL:** https://github.com/opensearch-project/sql/pull/4325 + +**Author:** @vamsimanohar + +**Created:** 2025-09-18T01:02:52Z + +**State:** MERGED + +**Merged:** 2025-09-18T01:37:03Z + +**Changes:** +294 -15 (8 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @vamsimanohar + + +## Description + +### Related Issues +Partially Resolves #4324 + +Also, removed CalciteGeoPointFormatsIT.testReadingGeoHash as we have resolved the https://github.com/opensearch-project/sql/pull/3445 + +Currently, it handles only map case and we need to follow up with array and other complex scenarios. + + + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +yamlTest is much cleaner. +https://github.com/opensearch-project/sql/tree/main/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/JdbcOpenSearchDataTypeConvertor.java:162` + + +if the problem is we didn't cover the `geo_point` nested in a struct, why we need this? shouldn't be handled by https://github.com/opensearch-project/sql/pull/4325/files#diff-979074bb911fcfdb6b2eb25ab8b32da65cc8aa12b43b40b5880b876596f9c8d8R80? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/JdbcOpenSearchDataTypeConvertor.java:162` + + +That place can only handle the geo_point type in the 1st level of our mapping/schema. And it has special logic for handling the transformation that Avatica convert `point` into `string`. + +For `geo_point` nested in a struct, we need do recursive parsing as it may be multiple-level nested. And when being nested in a struct, seems Avatica won't do that converting. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/JdbcOpenSearchDataTypeConvertor.java:162` + + +> For geo_point nested in a struct, we need do recursive parsing as it may be multiple-level nested. + +L164-L171 is the logic of recursive parsing. question is why we need handle Point in L159-L162? can these lines be deleted? @qianheng-aws + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/util/JdbcOpenSearchDataTypeConvertor.java:162` + + +I see, the `getExprValueFromSqlType` cannot handle nested type. So the patch recursively resolves the type in `processValue` rather than `getExprValueFromSqlType`. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4321: [Doc] Correct the comparision table for rex doc + +**URL:** https://github.com/opensearch-project/sql/pull/4321 + +**Author:** @RyanL1997 + +**Created:** 2025-09-17T17:58:00Z + +**State:** MERGED + +**Merged:** 2025-09-18T03:57:43Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `documentation`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Correct the comparision table for rex doc +- Correct the wrong comparison result between `rex` and `parse` in `rex.rst` + - Both `rex` and `parse` should only behave extraction instead of filtering. +- Add highlighted diff to mention `rex` supports multiple name group extractions + +### Related Issues +* Relate #4109 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +Remove this, if both command does not support it. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +good point. Fixed + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4320: Spotless precommit: apply instead of check + +**URL:** https://github.com/opensearch-project/sql/pull/4320 + +**Author:** @Swiddis + +**Created:** 2025-09-17T15:20:19Z + +**State:** MERGED + +**Merged:** 2025-09-17T23:22:08Z + +**Changes:** +7 -2 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Following https://github.com/opensearch-project/sql/pull/4306#discussion_r2354100847: want to playtest this before merging since it might have weird staging behavior. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ps48 - DISMISSED + + +Feels great to see this addition 🥇 😄 . I have invested good numbers of hours waiting for the CI just to see it fail on Spotless where rest of all integ, unit tests have passed. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Swiddis + + +upd: turns out that precommit stashes unstaged patches before running, so don't need to worry about staging at all when we get to the exec. + + +### @ps48 + + +@Swiddis Should we have both `apply` first and then `check` later? What if spotlessApply isn't able to fix all issues, something like an `import missing` + + +### @Swiddis + + +> I have invested good numbers of hours waiting for the CI just to see it fail on Spotless where rest of all integ, unit tests have passed. + +I think we all have -- hits most when you remember to check before the original push, but then forget to re-run it after applying PR feedback that doesn't change functionality + + +### @Swiddis + + +> @Swiddis Should we have both apply first and then check later? What if spotlessApply isn't able to fix all issues, something like an import missing + +I looked around and I don't see any rules we're using that can't be autoapplied, but we can future-proof it + + + +--- + +# PR #4319: [Backport 2.19-dev] Search Command Revamp + +**URL:** https://github.com/opensearch-project/sql/pull/4319 + +**Author:** @vamsimanohar + +**Created:** 2025-09-17T07:57:24Z + +**State:** MERGED + +**Merged:** 2025-09-17T18:47:44Z + +**Changes:** +3786 -303 (71 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4315: [Backport 2.19-dev]Push down project operator with non-identity projections into scan (#4279) + +**URL:** https://github.com/opensearch-project/sql/pull/4315 + +**Author:** @qianheng-aws + +**Created:** 2025-09-17T06:23:14Z + +**State:** MERGED + +**Merged:** 2025-09-17T07:17:13Z + +**Changes:** +189 -68 (30 files) + + +## Description + +* Support project push down after aggregation + + + +* Push down project operator with non-identity projections into scan + + + +* Fix IT + + + +* Also changing plan from merging main + + + +* Fix IT + + + +* Fix 4296 + + + +--------- + + + +(cherry picked from #4279 commit 0e5802b5e8c2022eb05bb65ab13aa76d3f10be81) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4313: [Backport 2.19-dev] Add `values` stats function with UDAF (#4276) + +**URL:** https://github.com/opensearch-project/sql/pull/4313 + +**Author:** @ps48 + +**Created:** 2025-09-17T02:21:26Z + +**State:** MERGED + +**Merged:** 2025-09-17T03:30:50Z + +**Changes:** +648 -24 (20 files) + + +## Description + +Original PR: https://github.com/opensearch-project/sql/pull/4276 +* Add stats function + + + +* add settings for max values + + + +* update functiontypetest IT + + + +* update documentation for values settings + + + +* update the rst docs, remove settingsholder + + + +* update AST additions + + + +* updated the IT testValuesFunctionGroupBy + + +--------- + + +(cherry picked from commit 64a8671bdd0e4cbb15ef038594bf1591098cbdf3) + + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4311: [Backport 2.19-dev] Add non-numeric field support for max/min functions (#4281) + +**URL:** https://github.com/opensearch-project/sql/pull/4311 + +**Author:** @ritvibhatt + +**Created:** 2025-09-17T01:47:44Z + +**State:** MERGED + +**Merged:** 2025-09-17T03:30:29Z + +**Changes:** +210 -8 (9 files) + + +## Description + +(cherry picked from commit 7088e0815131b41f3a350587d6568e623fadb276) + +fix switch for Java 11 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4309: Merge from main + +**URL:** https://github.com/opensearch-project/sql/pull/4309 + +**Author:** @lezzago + +**Created:** 2025-09-16T21:15:56Z + +**State:** MERGED + +**Merged:** 2025-09-16T21:47:35Z + +**Changes:** +18903 -4353 (270 files) + + +## Description + +### Description +Merge from main to keep in consistent with latest main changes + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4306: Add spotless precommit hook + license check + +**URL:** https://github.com/opensearch-project/sql/pull/4306 + +**Author:** @Swiddis + +**Created:** 2025-09-16T20:23:06Z + +**State:** MERGED + +**Merged:** 2025-09-17T15:05:49Z + +**Changes:** +268 -431 (115 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +A few fixes for spotless: +- Don't specify the version for all the sub-projects, have everything depend on the root version (you can't mix plugin versions in one classpath anyway, so this just makes it easier to update to newer spotless versions if a cool feature comes out down the road) +- Update spotless to the latest version (just has some newer settings & general perf improvements) +- Enable all the license header checks (skipping files with mixed licenses) +- Apply all the license header checks +- Add spotless pre-commit hook (for those who use pre-commit) -- avoids being surprised by it in actions runs + +The license check makes all the license headers across the files consistent -- I added exemption rules for specific files (those with mixed licenses), but otherwise all the files had their headers stabilized. There isn't a general option to preserve whatever headers are present and only check for missing ones, so I think this is fine. + +### Related Issues +Resolves #1893 +Resolves #883 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +todo: figure out a config to preserve these bits + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +done, added an ignore rule if the content matches "(original license below)" (rule assumes this means it's a mixed-license file) + + +### @ps48 on `.pre-commit-config.yaml:8` + + +can we apply spotless first and then check? Just a suggestion. If the fixes can be automated might as well automate them in pre-commit hook. + + +### @Swiddis on `.pre-commit-config.yaml:8` + + +Good idea, I'll followup (#4320) + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4305: Updating coalesce documentation + +**URL:** https://github.com/opensearch-project/sql/pull/4305 + +**Author:** @aalva500-prog + +**Created:** 2025-09-16T16:54:18Z + +**State:** MERGED + +**Merged:** 2025-09-22T17:32:45Z + +**Changes:** +60 -26 (2 files) + +**Labels:** `documentation` + + +## Description + +### Description + +With this PR, I would like to address the comments left here (https://github.com/opensearch-project/sql/issues/4199). Where there are some recommendations to enhance the `coalesce` documentation with a new section covering pre-3.1 alternatives. This would help users on older OpenSearch versions achieve `COALESCE-like` functionality using nested `IFNULL` statements, particularly for observability use cases where HTTP status code field names may vary. Also, I'm proposing to add a section for performance and limitations in the newly added `coalesce` function. + +#### Current COALESCE Documentation Status #### + +✅ **Well Covered - OpenSearch 3.1+ COALESCE function:** + +The documentation already includes comprehensive coverage of the native COALESCE function: + +1. Syntax and usage - ✅ Documented with clear usage pattern +2. Common use cases and examples - ✅ Multiple detailed examples including: + - Basic field fallback scenarios + - Empty string handling + - Mixed data types with auto coercion + - Non-existent field handling +3. Version specification - ✅ Clearly marked as "Version: 3.1.0" +4. Behavior details - ✅ Comprehensive behavior section covering: + - Null and missing value handling + - Empty string treatment + - Type coercion rules + - Return type determination + +❌ **Missing - Pre-3.1 COALESCE alternative patterns:** +The documentation is missing the requested section for pre-3.1 OpenSearch versions. Specifically missing: + +1. No documentation for nested IFNULL patterns as COALESCE alternatives +2. No examples of the pattern: `ifnull(cast(attributes.http.response.status_code as int), ifnull(cast(attributes.http.status_code as int), -1))` +3. No version guidance for users on older OpenSearch versions +4. No observability use case examples for varying field names + +❌ **Missing - Performance considerations:** +The current documentation doesn't include performance considerations for the COALESCE function. + +❌ **Missing - Limitations or restrictions:** +While the documentation covers behavior well, it doesn't explicitly call out limitations or restrictions. +### Related Issues +Resolves #[4199] + +### Check List +- [] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `docs/user/ppl/functions/condition.rst:None` + + +We should use `os>` instead of `PPL>` to enable doctest. + + +### @ykmr1224 on `docs/user/ppl/functions/condition.rst:None` + + +`Pre-3.1` sounds vague to me. `(Version <= 3.1)` or `(Only 3.0/3.1)` might be better notation? (I was not sure if it applies to 3.1 or not) + + +### @aalva500-prog on `docs/user/ppl/functions/condition.rst:None` + + +Seems like this file is using `PPL>` in a lot of commands. I'll fix all of them. + + +### @aalva500-prog on `docs/user/ppl/functions/condition.rst:None` + + +The nested `IFNULL` pattern should work on all versions, whereas `coalesce` only works on the newest versions. Maybe we can keep it as follows to avoid confusion: + +``` +Alternative: Nested IFNULL Pattern +>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + +For OpenSearch versions prior to 3.1, COALESCE-like functionality can be achieved using nested IFNULL statements. This pattern is particularly useful in observability use cases where field names may vary across different data sources. + +Usage: ifnull(field1, ifnull(field2, ifnull(field3, default_value))) +``` + + + +### @penghuo on `docs/user/ppl/functions/condition.rst:None` + + +Move to IFNULL section, as another example to explain how it works. + + +### @aalva500-prog on `docs/user/ppl/functions/condition.rst:None` + + +Sure, I'll do that, thanks! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4302: [Backport 2.19-dev] strftime function implementation + +**URL:** https://github.com/opensearch-project/sql/pull/4302 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-15T23:58:37Z + +**State:** MERGED + +**Merged:** 2025-09-16T04:39:33Z + +**Changes:** +1521 -19 (17 files) + + +## Description + +Backport 0af7429b2679279fe517e9d8c2a604e94f8cb27b from #4106. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4301: Update grammar files and developer guide + +**URL:** https://github.com/opensearch-project/sql/pull/4301 + +**Author:** @ahkcs + +**Created:** 2025-09-15T22:56:52Z + +**State:** MERGED + +**Merged:** 2025-09-17T23:34:32Z + +**Changes:** +743 -387 (3 files) + +**Labels:** `maintenance` + + +## Description + +### Description + Update language-grammar package with latest grammar files and documentation + + Summary + + - Updated all ANTLR grammar files in the language-grammar package with the latest versions from their respective source modules + - Added documentation to DEVELOPER_GUIDE.rst explaining the purpose and workflow of the language-grammar package + + Documentation Updates + + Added a new "Language Grammar Package" section to DEVELOPER_GUIDE.rst + + Purpose + + The language-grammar package serves as a centralized repository for all ANTLR grammar files, enabling consistent sharing between the main SQL repository and Spark repository while reducing duplication. This update ensures the package contains the latest grammar definitions. + + + Next Steps + + Once merged, this will trigger CI to automatically publish the updated grammar files to Maven Central for consumption by dependent projects. + + + +## Reviews + + +### @RyanL1997 - COMMENTED + + +@ahkcs , thanks for the change. I just left some comments. ~~Also, I would like to ask a high level first: how did you determine what should be moved or not? Did we regenerated these `.g4` file?~~ + +NVM. I just discovered that the change was in `language-grammar` so it should be align with the actual `.g4` file we did for all the new command. So the change is LGTM. + + +## Review Comments + + +### @ykmr1224 on `DEVELOPER_GUIDE.rst:None` + + +It looks weird to have this amount of explanation here. Should we make separate section below and link from here, or might want to add README under languate-grammar directory. + +btw, what is the long term plan for it? it seems adding another duplicate grammar files... + + +### @ahkcs on `DEVELOPER_GUIDE.rst:None` + + +I have updated the doc. I kept it in developer_guide since this is an action item for the developers when they update the grammar files. I placed the details in a new section + +For the long term plan, we are planning to use this package as the centralized placed to keep grammar files from SQL and Spark repo, and upload them to maven repo as dependency files. + + + + +### @RyanL1997 on `language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4:546` + + +just for my knowledge: hows is the above comment related to this line here? + + +### @RyanL1997 on `language-grammar/src/main/antlr4/OpenSearchPPLParser.g4:327` + + +why removing all these? + + +## General Comments + + +### @ahkcs + + +> @ahkcs , thanks for the change. I just left some comments. Also, I would like to ask a high level first: how did you determine what should be moved or not? Did we regenerated these `.g4` file? + +The change is caused by updating our grammar files to the latest + + +--- + +# PR #4300: Add support for writing resources and include more tests + +**URL:** https://github.com/opensearch-project/sql/pull/4300 + +**Author:** @lezzago + +**Created:** 2025-09-15T22:00:02Z + +**State:** MERGED + +**Merged:** 2025-09-23T19:52:57Z + +**Changes:** +3467 -57 (52 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Add support for writing resources and include more tests. +Fixed many experimental tag placements. + +### Related Issues +Resolves N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @vamsimanohar - APPROVED + + +LGTM. I think we still need to think through on what to use for permissions. + +`cluster:admin/opensearch/direct_query/write/resources` + + +## Review Comments + + +### @joshuali925 on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/WriteDirectQueryResourcesRequest.java:23` + + +would the values ever need to be non-string? + + +### @joshuali925 on `direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClient.java:109` + + +return annotation doesn't match return type, is it JSONArray as string? + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/rest/RestDirectQueryResourcesManagementAction.java:95` + + +I thought we are going to have POST for both write and read resources? + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/format/DirectQueryRequestConverter.java:None` + + +nit. duplicated annotation? + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/format/DirectQueryResourcesRequestConverter.java:None` + + +nit. duplicated annotation? + + +### @joshuali925 on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/GetDirectQueryResourcesActionRequest.java:28` + + +curious why remove this? + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/WriteDirectQueryResourcesRequest.java:23` + + +These are the request options so String should be safe here. The datasource that takes in this request to deserialize it to a non-string like an int if needed. + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClient.java:109` + + +Good catch, will update the annotation to be of String. + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/rest/RestDirectQueryResourcesManagementAction.java:95` + + +This one single route was only added and it was a create alert manager resources, so it had to be POST. I think that since this is experimental, lets keep GET for now in read resources and add POST as a followup if needed. + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/format/DirectQueryRequestConverter.java:None` + + +removing + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/format/DirectQueryResourcesRequestConverter.java:None` + + +removing + + +### @lezzago on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/GetDirectQueryResourcesActionRequest.java:28` + + +I was having test failures because this would require the input stream to have all the required params for the parent classes like needed a field like nodeId. Below is an example failure from the test `testStreamConstructorWithAllFields` in `GetDirectQueryResourcesActionRequestTest`. +``` +> Task :direct-query:test FAILED +GetDirectQueryResourcesActionRequestTest > testStreamConstructorWithAllFields() FAILED + java.lang.NullPointerException: Cannot invoke "String.isEmpty()" because "nodeId" is null + at org.opensearch.core.tasks.TaskId.readFromStream(TaskId.java:99) + at org.opensearch.transport.TransportRequest.(TransportRequest.java:74) + at org.opensearch.action.ActionRequest.(ActionRequest.java:58) + at org.opensearch.sql.directquery.transport.model.GetDirectQueryResourcesActionRequest.(GetDirectQueryResourcesActionRequest.java:29) + at org.opensearch.sql.directquery.transport.model.GetDirectQueryResourcesActionRequestTest.testStreamConstructorWithAllFields(GetDirectQueryResourcesActionRequestTest.java:54) + +``` + + +### @vamsimanohar on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/WriteDirectQueryResourcesRequest.java:22` + + +is this requestBody? + + +### @vamsimanohar on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/WriteDirectQueryResourcesRequest.java:20` + + +Is this class form earlier PRs? Is this going to be a list of all resource types from all datasource types? + + +### @vamsimanohar on `direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClientImpl.java:None` + + +is this fully qualified name necessary? +weirdly llms are generating fully qualified names I have faced this issue earlier. + + +### @vamsimanohar on `direct-query-core/src/main/java/org/opensearch/sql/prometheus/query/PrometheusQueryHandler.java:None` + + +Nit same as above. + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClientImpl.java:None` + + +Ack, it was from the existing code that I copied, but good catch. I will see if I can clean any of this up anywhere else in the modules. + + +### @vamsimanohar on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/TransportWriteDirectQueryResourcesRequestAction.java:None` + + +Is `indices:data` correct namespace? What is the rationale behind this? + +` public static final String NAME = "cluster:admin/opensearch/ql/datasources/read";` + +For example we are using the above for GetDatasourcesQueryRequest. The rationale here is that datasources are cluster level resource which admin can configure. + + + + +### @vamsimanohar on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/TransportWriteDirectQueryResourcesRequestAction.java:None` + + +[NIT] Also, we need to create an issue for making changes in security dashboards plugin to include all the new permissions. + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/prometheus/query/PrometheusQueryHandler.java:None` + + +done + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/WriteDirectQueryResourcesRequest.java:20` + + +This class is from earlier PRs. Currently this is the list of all resource types. When this goes to a new plugin, there needs to be some re-architecturing to fetch this automatically for each datasource supported. + + +### @lezzago on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/WriteDirectQueryResourcesRequest.java:22` + + +Yes, we pass through the request body. + + +## General Comments + + +### @lezzago + + +> LGTM. I think we still need to think through on what to use for permissions. +> +> `cluster:admin/opensearch/direct_query/write/resources` + +Is this for more granular level permissions on what data sources the users would have access to? But I agree before going GA with the feature, we need to have this get more feedback from the community as well. + + +--- + +# PR #4298: [Backport 2.19-dev] `mvjoin` support in PPL Caclite + +**URL:** https://github.com/opensearch-project/sql/pull/4298 + +**Author:** @ps48 + +**Created:** 2025-09-15T20:42:49Z + +**State:** MERGED + +**Merged:** 2025-09-15T22:05:02Z + +**Changes:** +328 -1 (12 files) + + +## Description + +### Description +This PR adds support for the mvjoin function in PPL with Apache Calcite integration. The mvjoin function concatenates the elements of a multi-value field into a single string using a specified delimiter. This is useful for converting multi-value fields into a readable string format. + +Syntax: `mvjoin(multivalue_field, delimiter)` + +Example: +``` +source=logs | eval joined_values = mvjoin(array('apple', 'banana', 'cherry'), ', ') +// Result: "apple, banana, cherry" +``` + +Original PR to main: https://github.com/opensearch-project/sql/pull/4217 + + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4297: Support `Streamstats` command with calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4297 + +**Author:** @ishaoxy + +**Created:** 2025-09-15T11:53:08Z + +**State:** MERGED + +**Merged:** 2025-11-04T02:53:04Z + +**Changes:** +2504 -26 (34 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Support `Streamstats` command with arguments below: + +``` +streamstats + [current=] + [window=] + [global=] + [reset_before="("")"] + [reset_after="("")"] + stats-agg-term + [by-clause] +``` +Also rule out aggregator in PPLAggregateMergeRule. + +## Implementation Details + +The implementation handles three distinct execution paths, depending on the combination of `window`, `global`, `group`, and `reset` arguments: + +**Why This Design** + +1. Default path can rely on native SQL OVER because there is no global/window-with-reset complexity. + +2. **Specific SQL limitations:** + +- Native SQL OVER clauses cannot implement **per-group sliding windows over the entire stream** . However, we want to combine a global sequence with group-level partitioning. In SQL, a window is either **global** without a BY clause or **partitioned by a group** with a BY clause; you cannot have a “global sequence plus per-group sliding frame” in one OVER. + +- `ROWS BETWEEN ... PRECEDING` cannot take a variable (it only supports constants like `1 PRECEDING`, `1+1 PRECEDING`). + +3. `Global + window + group` want "per-group sliding windows over entire stream," but SQL window functions do not allow fully flexible frame boundaries combined with lateral joins. Hence, we simulate it via `ROW_NUMBER() + correlated join + aggregate`. + +4. Reset path introduces segment semantics (`seg_id`) that cannot be represented natively in SQL OVER clauses. Each reset creates a new frame partition. By default, reset behaves like a global window, but when grouping exists, it applies per-group aggregation within each reset segment. So I use helper columns (before_flag, after_flag, seg_id) and a correlated join ensures correctness. + +**1. Default Path (No global in use / no reset)** + +- Window functions are translated directly using visitWindowFunction(). +- Calcite OVER clauses are generated for each aggregate. +- SQL-like plan: + +``` +SELECT *, +FROM source +``` + +**2. global=true + window > 0 + group exists** + +To support sliding windows over the entire stream with optional grouping: + +- A global sequence column (ROW_NUMBER() OVER (ORDER BY ...) AS seq) is added. +- Correlated LEFT JOINs simulate the sliding window using seq and by-clause equality filters. +- Each window function is converted into a standard aggregate (AggregateFunction) and executed within the correlated subquery. +- SQL-like plan: + +``` +WITH t AS ( + SELECT x.*, + ROW_NUMBER() OVER (ORDER BY /* default ordering */) AS seq + FROM source x +) +SELECT t.*, agg.* +FROM t +LEFT JOIN LATERAL ( + SELECT SUM(age) AS sum_age + FROM t r + WHERE r.seq BETWEEN t.seq - (:window - 1) AND t.seq + AND r.gender IS NOT DISTINCT FROM t.gender +) AS agg ON TRUE; +``` + +**3. Reset Path (reset_before / reset_after defined)** + +When `reset_before` or `reset_after` exist: + +- Helper columns are added: + +1. __stream_seq__: global row number. +2. __reset_before_flag__ / __reset_after_flag__: flags for reset conditions. +3. __seg_id__: segment ID, computed via SUM over flags to identify partitions. + +- Correlated LEFT JOIN + aggregate simulates the frame while respecting segment boundaries (seg_id) and optional group filtering. +- SQL-like plan: + +``` +WITH base AS ( + SELECT s.*, + ROW_NUMBER() OVER (ORDER BY /* default */) AS seq, + CASE WHEN (/reset_before predicate/) THEN 1 ELSE 0 END AS before_flag, + CASE WHEN (/reset_after predicate/) THEN 1 ELSE 0 END AS after_flag + FROM source s +), +seg AS ( + SELECT b.*, + COALESCE(SUM(before_flag) OVER (ORDER BY seq ROWS UNBOUNDED PRECEDING), 0) + + COALESCE(SUM(after_flag) OVER (ORDER BY seq ROWS UNBOUNDED PRECEDING + RANGE BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) + AS seg_id + FROM base b +) +SELECT t.*, agg.* +FROM seg t +LEFT JOIN LATERAL ( + SELECT /* window aggregates: SUM(age), AVG(salary), ... */ + FROM seg r + WHERE ( + CASE + WHEN :window = 0 AND :current THEN r.seq <= t.seq + WHEN :window = 0 AND NOT :current THEN r.seq < t.seq + WHEN :current THEN r.seq BETWEEN t.seq - (:window - 1) AND t.seq + ELSE r.seq BETWEEN t.seq - :window AND t.seq - 1 + END + ) + AND r.seg_id = t.seg_id + AND (r.gender IS NOT DISTINCT FROM t.gender) -- optional by-clause +) AS agg ON TRUE; +``` + + +### Related Issues +Resolves #4207 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +Looks good to me. Thanks for your contribution. I left some comments for minor suggestions. + +Also, please resolve the conflicts + + +### @LantaoJin - COMMENTED + + +haven't review the code details, just need to refactor the user doc and confirm the behaviour + + +### @LantaoJin - COMMENTED + + +Please add some test` in `CalciteExplainIT.java` + + +### @yuancu - COMMENTED + + +Reviewing in progress, CalciteRelNodeVisitor is not reviewed yet. + + +### @yuancu - DISMISSED + + +LGTM + + +### @LantaoJin - COMMENTED + + +Basically looks good, please fix the IT. + + +## Review Comments + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Can be replaced with `WindowFrame.toCurrentRow()` for robustness + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/tree/StreamWindow.java:19` + + +How's this class different from the `Window` class? It seems they are identical apart from `argExprList`, which stores the options / arguments of `streamstats` command. But the argExprList is never used (or will they be used in the future?). + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Can you also add tests for these erroneous cases? + + +### @ishaoxy on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Replaced, thanks for reminding. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/ast/tree/StreamWindow.java:19` + + +Now refactored. + + +### @ishaoxy on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +Added an IT case for negative window size. + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +The number of `=` should equal the length of text + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +ditto + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +`|` at the beginning of line means starting a new line. So no need to add empty new line if you add `|` at the beginning of line + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:26` + + +Add `|` in L24 or insert an empty new line before L24. Delete `|` in L23 or delete L22. + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +can be deleted + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +delete empty line between two bullet statements. + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +Combine lines 57~59 into +``` + * current: optional. If true, the search includes the given, or current, event in the summary calculations. If false, the search uses the field value from the previous event. **Default:** true. +``` + + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +ditto for rest + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +> Used only when the window argument is set. + +This line confused me. As the default value of `global` is `true`, and default value of `window` is `0`. What if the `window` and `global` all not set? Or what if we set global=true and window=0? + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +And what if we set `global=false` and `window=4` but no `by` clause? + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +refactor L96~L366 according to https://github.com/opensearch-project/sql/pull/4562/files#diff-75c70b8c047f6bd1daaea269784eeacdd4c6bba49cb4a34dd3e84a0d8aa6cffd + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +Update all `PPL>` to `os>` + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +delete this section + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +seems the result ordered by country +``` +source=state_country | sort country | streamstats window=2 global=true avg(age) as running_avg by country +``` + + +### @LantaoJin on `docs/user/ppl/cmd/streamstats.rst:None` + + +ditto + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:25` + + +remove this line + + +### @LantaoJin on `docs/user/ppl/index.rst:115` + + +please add streamstats.rst to category.json as well + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:118` + + +these three seems missing in `keywordsCanBeId` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:18` + + +add some tests with other command +eventstats + streamstats: +``` +source = t | eventstats ... | streamstats ... +``` +sort + streamstats: +``` +source = t | sort ... | streamstats ... +``` +left join with streamstats: +``` +source = t | left join left=l right=r on l.key=r.key [ source = tt | streamstats ... ] +``` +streamstats in in-subsearch: +``` +source = t | where a in [ source = tt | streamstats ...] +``` + + +### @ishaoxy on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java:18` + + +fixed all the doc issues and added all the tests mentioned above. @LantaoJin + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:17` + + +why enable this test only for pushdown? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:32` + + +ditto + + +### @ishaoxy on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:17` + + +I previously imitated the behavior of `testEventstatsDistinctCountExplain()`, and I'm not sure why it was set to only pushdown. Now fixed. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:806` + + +Can you add a few more complex explain ITs (e.g. covering all 3 plan paths)? + + +## General Comments + + +### @qianheng-aws + + +[question] +2. global=true + window > 0 + group exists +Is +``` +WITH t AS ( + SELECT x.*, + ROW_NUMBER() OVER (ORDER BY /* default ordering */) AS seq + FROM source x +) +SELECT t.*, agg.* +FROM t +LEFT JOIN LATERAL ( + SELECT SUM(age) AS sum_age + FROM t r + WHERE r.seq BETWEEN t.seq - (:window - 1) AND t.seq + AND r.gender IS NOT DISTINCT FROM t.gender +) AS agg ON TRUE; +``` +equal to +``` +SELECT + t.*, + SUM(age) OVER ( + PARTITION BY gender + ORDER BY seq + ROWS BETWEEN (:window - 1) PRECEDING AND CURRENT ROW + ) as sum_age +FROM ( + SELECT x.*, + ROW_NUMBER() OVER (ORDER BY /* default ordering */) AS seq + FROM source x +) t; +``` +? + +3. Reset Path (reset_before / reset_after defined) +I don't get the implementation presented by the SQL in our description. Seems it implements the reset_after and reset_before of accumulation by splitting all rows into several groups based on `seg_id`. But is it really correct and do we have results comparison with SPL? + + +### @qianheng-aws + + +Do we have explain test cases for feature 2(i.e. global) and 3(i.e. reset_before, reset_after)? + + +### @ishaoxy + + +> SELECT +> t.*, +> SUM(age) OVER ( +> PARTITION BY gender +> ORDER BY seq +> ROWS BETWEEN (:window - 1) PRECEDING AND CURRENT ROW +> ) as sum_age +> FROM ( +> SELECT x.*, +> ROW_NUMBER() OVER (ORDER BY /* default ordering */) AS seq +> FROM source x +> ) t; + +I think the second case is the same as the default solution window=n, global=false+by, but it cannot achieve the case of window=n, global=true+by, which is why I used the former expression. + +And for Reset path, I have done a lot of experiments on SPL, and I think our current implementation is consistent with the behavior of it. @qianheng-aws + + +### @ishaoxy + + +The "global" path behavior is influenced by pr #4703 and resulted in an incorrect performance. Thanks to @yuancu 's help, now aggregator is excluded in PPLAggregateMergeRule in this pr. + + +--- + +# PR #4293: Fix doctest branch (2.19) + +**URL:** https://github.com/opensearch-project/sql/pull/4293 + +**Author:** @Swiddis + +**Created:** 2025-09-12T22:07:08Z + +**State:** MERGED + +**Merged:** 2025-09-12T22:51:24Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Same as #4292 + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4292: Fix doctest branch + +**URL:** https://github.com/opensearch-project/sql/pull/4292 + +**Author:** @Swiddis + +**Created:** 2025-09-12T22:06:37Z + +**State:** MERGED + +**Merged:** 2025-09-12T22:51:38Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Forgot to apply this fix to the checkout in #4219, credit to @vamsimanohar for the fix + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4286: [Backport 2.19-dev] Doctest: Use 1.0 branch instead of main + +**URL:** https://github.com/opensearch-project/sql/pull/4286 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-12T02:44:40Z + +**State:** MERGED + +**Merged:** 2025-09-12T18:23:24Z + +**Changes:** +2 -2 (1 files) + + +## Description + +Backport 04a063359f4c07458b633b55441be936b9ee86b6 from #4219. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4285: [Backport 2.19-dev] Speed up aggregation pushdown for single group-by expression (#3550) + +**URL:** https://github.com/opensearch-project/sql/pull/4285 + +**Author:** @LantaoJin + +**Created:** 2025-09-11T23:36:11Z + +**State:** MERGED + +**Merged:** 2025-09-12T17:09:18Z + +**Changes:** +1794 -567 (65 files) + + +## Description + +(cherry picked #3550 from commit 0e2dc7cc2e1b291abcf13de9d34b1de97c0f51bf) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4284: [Backport 2.19-dev] Add earliest/latest aggregate function for eventstats PPL command #4212 + +**URL:** https://github.com/opensearch-project/sql/pull/4284 + +**Author:** @ykmr1224 + +**Created:** 2025-09-11T23:09:03Z + +**State:** MERGED + +**Merged:** 2025-09-15T22:11:06Z + +**Changes:** +1360 -610 (32 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/7de854542dd16ec9695162533673db6710cbde74 from #4212 to 2.19-dev branch + +Manual backport due to Java 11 compatibility. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4283: [Backport 2.19-dev] Introduce YAML formatter for better testing/debugging + +**URL:** https://github.com/opensearch-project/sql/pull/4283 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-11T22:47:34Z + +**State:** MERGED + +**Merged:** 2025-09-11T23:27:02Z + +**Changes:** +297 -14 (11 files) + + +## Description + +Backport 6d694402aa7a7ddc61bdd6756da6f3df9fe8158f from #4274. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4281: Add non-numeric field support for max/min functions + +**URL:** https://github.com/opensearch-project/sql/pull/4281 + +**Author:** @ritvibhatt + +**Created:** 2025-09-11T19:24:32Z + +**State:** MERGED + +**Merged:** 2025-09-17T00:20:45Z + +**Changes:** +210 -6 (9 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Add support for non-numeric and mixed field types for max/min aggregate functions. Non-numeric fields are sorted lexicographically. + +- Updated AggregateAnalyzer to use TopHits with size=1 for pushdown when field type is text/keyword +- Added MaxMinParser for handling TopHits results from min/max on text fields, converting all + values to strings for consistent lexicographical ordering for mixed field types + +### Related Issues +Resolves #4050 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +### @ykmr1224 - APPROVED + + +LGTM other than minor refactoring comment. It is fine to address those comment in a separate PR. + + +## Review Comments + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java:None` + + +Could you check if we can reuse the existing `TopHitsParser`? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +1. Is current approach working for any non-numerical field or any string field? Just thinking if we can extend this support to more field type in future, such as min/max(ip) etc. +2. I recall there is some convert text to keyword logic, do we support min/max(text field) already with current changes? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +The only difference between this and the code for MAX is in sort order? + + +### @dai-chen on `integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json:None` + + +This looks like V2 explain output? Could you double check? + + +### @ritvibhatt on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java:None` + + +Yes ```TopHitsParser``` works with single result thanks, updated + + +### @ritvibhatt on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +1. Updated to include ip address type, date types are already supported +2. Yup both text and keyword are supported with current changes + + +### @ritvibhatt on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +Yes + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +I'm thinking should we do the opposite. Because OpenSearch doc says min/max aggregation can only support numerical fields, but it doesn't mention what field type is supported / not supported by top hits aggregation. + +Could you double check what other common field types min/max aggregation can support besides numerics, e.g., date, IP, object etc? If that's the case, probably we should check numeric type instead and do top hits aggregation for any non-numerical types? + + +### @ritvibhatt on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +That makes sense, double checked and it looks like max/min aggregation only supports numeric and date types, so updated to check for those otherwise do top hits aggregation + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:615` + + +nit: we can use assertYamlEqualsJsonIgnoreId (refer https://github.com/opensearch-project/sql/pull/4274) Calcite plan readability. + + +### @ykmr1224 on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:303` + + +MIN and MAX logic seems same other than AggregationBuilders.min/max and ASC/DESC. Can we extract as a method for maintainability? + + +### @ritvibhatt on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:303` + + +Yup makes sense will add this and using ```assertYamlEqualsJsonIgnoreId``` in a follow up pr + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4279: Push down project operator with non-identity projections into scan + +**URL:** https://github.com/opensearch-project/sql/pull/4279 + +**Author:** @qianheng-aws + +**Created:** 2025-09-11T17:44:06Z + +**State:** MERGED + +**Merged:** 2025-09-17T03:16:09Z + +**Changes:** +189 -69 (30 files) + +**Labels:** `bug`, `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +This PR aims to push down or eliminate the project upon the scan with aggregate pushed down. + +Since we will always append a project operator after aggragtion for adjusting the order of its output columns, this optimization will enhance this common case to have only one `scan` operator, then it is able to leverage our https://github.com/opensearch-project/sql/pull/3853 to skip codegen. + +e.g. +PPL: +``` +PPL: +source=opensearch-sql_test_index_bank | stats count() by span(birthdate,1M) +``` +Before this PR: +``` +FINAL PLAN: +EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1M)=[$t0]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->... +``` + +With this PR: +``` +FINAL PLAN: +CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->..., Project->[count(), span(birthdate,1M)]] ...] +``` + +### Related Issues +Resolves +https://github.com/opensearch-project/sql/issues/4230 +https://github.com/opensearch-project/sql/issues/4296 +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json:4` + + +Is this the side effect of this optimization? +before: +`EnumerableCalc + CalciteEnumerableIndexScan + 1 script` +after: +`CalciteEnumerableIndexScan + 3 scripts` + +Would be a new regression issue? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json:4` + + +Seems it will be regression. The VolcanoPlanner chooses this new plan because the optimizer thinks its cost is smaller than before under the current cost computing mechanism. + +The implementation of this `OpenSearchProjectIndexScanRule` may be fine. The root cause is our out-of-date cost-computing mechanism. It's current logic no longer suites our so many different push down cases. It will be address by here: https://github.com/opensearch-project/sql/issues/4312 + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4277: [Backport 2.19-dev] [Feature] Implementation of mode `sed` and `offset_field` in rex PPL command + +**URL:** https://github.com/opensearch-project/sql/pull/4277 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-11T16:14:49Z + +**State:** MERGED + +**Merged:** 2025-09-11T22:49:04Z + +**Changes:** +507 -12 (15 files) + + +## Description + +Backport ab02d56a9cbabfe8ec95957dec64d481438ce043 from #4241. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4276: Add `values` stats function with UDAF + +**URL:** https://github.com/opensearch-project/sql/pull/4276 + +**Author:** @ps48 + +**Created:** 2025-09-11T16:05:53Z + +**State:** MERGED + +**Merged:** 2025-09-17T01:57:12Z + +**Changes:** +646 -24 (20 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +This PR enhances the values() aggregation function in PPL (Piped Processing Language) for the Calcite engine by making the maximum value limit configurable. The values() function collects all unique values from a field into a sorted array, providing a de-duplicated and lexicographically ordered view of the data. + + - Unique Values Only: Automatically deduplicates values using TreeSet + - Sorted Output: Returns values in lexicographical order (as strings) + - Configurable Limit: Users can set plugins.ppl.values.max.limit to control max values (0 = unlimited) + - Null Filtering: Automatically filters out null values + - Type Support: Supports all scalar data types (numeric, string, boolean, date/time, IP, binary) + + Usage Example +``` + source=accounts | stats values(firstname) + // Returns: ["Amber","Dale","Hattie","Nanette"] (sorted, unique) +``` + +### Related Issues +Resolves #4026 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/stats.rst:595` + + +Reformat rst doc. https://github.com/ps48/sql/blob/84241363f679191e3b04ca7959cfc220269e62c1/docs/user/ppl/cmd/stats.rst#values + + +### @penghuo on `docs/user/ppl/cmd/stats.rst:None` + + +duplicate with line 590. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +Avoid fetch from setting, follow TakeAggFunction.class impl + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +Values (array/object/... ) can not been translate to string should be ignored, instead of throw exception + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +there is null check on line 52, but convertToString(value); can also return null, which one is correct? + + +### @ps48 on `docs/user/ppl/cmd/stats.rst:595` + + +Updated here: https://github.com/opensearch-project/sql/pull/4276/commits/50a4bf8948a4194b3766aafb312ff729e93e70e3 + + +### @ps48 on `docs/user/ppl/cmd/stats.rst:None` + + +yes, removed it here https://github.com/opensearch-project/sql/pull/4276/commits/50a4bf8948a4194b3766aafb312ff729e93e70e3 + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +updated with AST implementation: https://github.com/opensearch-project/sql/pull/4276/commits/cb5dcdf4d2390b6125dba5d3c0e3987cb5b4454a + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +These should be automatically ignored in the function typechecker + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +Simplified this to ` String stringValue = String.valueOf(value);` + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +For Array fields and MV fields I see the issue with both `take` and `values` where both return runtime exceptions. + +``` +### mv +POST http://localhost:9200/_plugins/_ppl/ +Content-Type: application/json + +{ + "query": "source=sample-logs | stats values(calc)" +} + +### +POST http://localhost:9200/_plugins/_ppl/ +Content-Type: application/json + +{ + "query": "source=sample-logs | stats take(calc)" +} + +``` + +Output: +``` +HTTP/1.1 500 Internal Server Error +X-OpenSearch-Version: OpenSearch/3.2.0-SNAPSHOT (opensearch) +content-type: text/plain; charset=UTF-8 +content-encoding: gzip +content-length: 248 + +{ + "error": { + "reason": "There was internal problem at backend", + "details": "java.sql.SQLException: exception while executing query: class java.util.ArrayList cannot be cast to class java.lang.Long (java.util.ArrayList and java.lang.Long are in module java.base of loader 'bootstrap')", + "type": "RuntimeException" + }, + "status": 500 +} +``` + + + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +Thanks +Track with scheamless issue https://github.com/opensearch-project/sql/issues/3995 + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +String.valueOf(value); will return "null", is it expected? + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +this seems to be the rootcause: https://github.com/opensearch-project/sql/blob/0af7429b2679279fe517e9d8c2a604e94f8cb27b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java#L274C27-L274C49 + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ValuesAggFunction.java:None` + + +@penghuo For cases where `value ==> "null"` (string value), we'll include "null" in output. I feel this should be fine. As these are not actual null values. These are null strings ingested by the users. + +For cases where `value ==> null` is handled in line 51. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4274: Introduce YAML formatter for better testing/debugging + +**URL:** https://github.com/opensearch-project/sql/pull/4274 + +**Author:** @ykmr1224 + +**Created:** 2025-09-11T15:28:30Z + +**State:** MERGED + +**Merged:** 2025-09-11T22:46:44Z + +**Changes:** +297 -14 (11 files) + +**Labels:** `maintenance`, `backport 2.19-dev` + + +## Description + +(No production code change) + +### Description +- Introduce YAML formatter for better testing/debugging + - We can format Calcite Plan into more readable format. + - Diff would become easier to read than JSON + - Added to `core` package so it can be used for debugging in other packages. +- Formatter is configured to output consistent YAML where attributes are sorted and use minimum quoting. +- Fixed one test case from CalciteExplainIT as an example. + - I can migrate other test cases after this PR is approved and merged. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +This is really nice + +Feature request: support this as an output format via the `_explain` endpoint (`POST ../_explain?format=yml`) so we can have this when doing live debugging + + +## Review Comments + + +### @Swiddis on `core/src/test/java/org/opensearch/sql/utils/YamlFormatterTest.java:None` + + +Test failures on windows because jackson uses `\r\n` on windows by default, you should be able to configure this https://github.com/FasterXML/jackson-databind/issues/585 + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/utils/YamlFormatterTest.java:None` + + +Got it. Thank you for giving reference! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4273: Print links to test logs after integTest + +**URL:** https://github.com/opensearch-project/sql/pull/4273 + +**Author:** @ykmr1224 + +**Created:** 2025-09-10T22:30:02Z + +**State:** MERGED + +**Merged:** 2025-09-11T15:58:47Z + +**Changes:** +10 -1 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +- Print links to test logs after integTest + - It helps coding agent to identify the server side logs + +Sample output +``` +% ./gradlew :integ-test:integTest +... +> Task :integ-test:integTest +Test report available at: file:///Volumes/workplace/sql/integ-test/build/reports/tests/integTest/index.html +integTest cluster logs available at: file:///Volumes/workplace/sql/integ-test/build/testclusters/integTest-0/logs/integTest.log +remoteCluster cluster logs available at: file:///Volumes/workplace/sql/integ-test/build/testclusters/remoteCluster-0/logs/remoteCluster.log +... +``` + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +The test report path is already logged elsewhere iirc, doesn't hurt to log it again w/ the cluster logs though + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +> The test report path is already logged elsewhere iirc, doesn't hurt to log it again w/ the cluster logs though + +It does only when it failed, and hidden in pile of log outputs... + + +--- + +# PR #4271: [Backport 2.19-dev] Push down limit operator into aggregation bucket size (#4228) + +**URL:** https://github.com/opensearch-project/sql/pull/4271 + +**Author:** @qianheng-aws + +**Created:** 2025-09-10T21:53:25Z + +**State:** MERGED + +**Merged:** 2025-09-11T22:50:20Z + +**Changes:** +212 -42 (25 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4270: Fix the IT issue caused by merging conflict + +**URL:** https://github.com/opensearch-project/sql/pull/4270 + +**Author:** @qianheng-aws + +**Created:** 2025-09-10T21:36:53Z + +**State:** MERGED + +**Merged:** 2025-09-10T22:03:28Z + +**Changes:** +6 -1 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +The IT `CalciteExplainIT > testExplainOnFirstLast FAILED` failed on main branch after merging https://github.com/opensearch-project/sql/pull/4228, because of plan having changed. + +### Related Issues + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4269: [Backport 2.19-dev] Support first/last aggregate functions for PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4269 + +**Author:** @ahkcs + +**Created:** 2025-09-10T16:45:13Z + +**State:** MERGED + +**Merged:** 2025-09-10T21:14:38Z + +**Changes:** +867 -9 (15 files) + + +## Description + +(cherry picked from commit e232f6aed42aa972ed44861454ab020178f3a931) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Can you update the latest code base again? @ahkcs + + +--- + +# PR #4268: [2.19-dev] Fix CalcitePPLAppendCommandIT failure via new join config + +**URL:** https://github.com/opensearch-project/sql/pull/4268 + +**Author:** @LantaoJin + +**Created:** 2025-09-10T16:19:29Z + +**State:** MERGED + +**Merged:** 2025-09-10T18:42:42Z + +**Changes:** +78 -62 (1 files) + +**Labels:** `testing` + + +## Description + +### Description +#4244 missed a config after #4267 merged. This PR is to fix the IT failure by adding a new config introduced in #4267 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Just curious, this is due to the Join and Append PR merged at the same time? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +> Just curious, this is due to the Join and Append PR merged at the same time? + +Yes. +``` +PR-Append main merged, +PR-Join main merged, +PR-Append 2.19 created, +PR-Join 2.19 created, +PR-Join 2.19 passed & merged, +PR-Append 2.19 passed & merged (should update again), +Logical conflicts occurred. +``` + + +--- + +# PR #4267: [Backport 2.19-dev] Support join field list and join options (#3803) + +**URL:** https://github.com/opensearch-project/sql/pull/4267 + +**Author:** @LantaoJin + +**Created:** 2025-09-10T06:10:08Z + +**State:** MERGED + +**Merged:** 2025-09-10T15:24:18Z + +**Changes:** +1330 -265 (30 files) + + +## Description + +(cherry picked from #3803 commit d46cb4cdbb3f56a1485a3aa2d81455efc7d09eb8) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4263: Fix gitignore to ignore symbolic link + +**URL:** https://github.com/opensearch-project/sql/pull/4263 + +**Author:** @ykmr1224 + +**Created:** 2025-09-09T23:27:53Z + +**State:** MERGED + +**Merged:** 2025-09-10T16:26:03Z + +**Changes:** +4 -4 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +- Previous commit was working for directory, but not working for symbolic link to directory. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4258: Add gitignore for Cline + +**URL:** https://github.com/opensearch-project/sql/pull/4258 + +**Author:** @ykmr1224 + +**Created:** 2025-09-09T21:12:21Z + +**State:** MERGED + +**Merged:** 2025-09-09T21:38:56Z + +**Changes:** +4 -2 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +- Add igignore items for Cline + +### Related Issues +- n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4257: Add Ryan as a maintainer + +**URL:** https://github.com/opensearch-project/sql/pull/4257 + +**Author:** @Swiddis + +**Created:** 2025-09-09T20:39:03Z + +**State:** MERGED + +**Merged:** 2025-09-17T16:39:01Z + +**Changes:** +3 -2 (2 files) + +**Labels:** `maintenance`, `backport-failed`, `backport 2.19` + + +## Description + +### Description +Currently under vote + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Swiddis + + +We have ~~7~~ 9 votes now, including me -- will wait for an admin to merge this based on https://github.com/opensearch-project/.github/issues/383 + + +--- + +# PR #4256: [Backport 2.19-dev] Dynamic source selector in PPL Grammar. + +**URL:** https://github.com/opensearch-project/sql/pull/4256 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-09T20:00:55Z + +**State:** MERGED + +**Merged:** 2025-09-09T20:39:15Z + +**Changes:** +225 -0 (4 files) + + +## Description + +Backport 1e8b2892c4bada2f83dfc753ab9d45f5989ad8a7 from #4116. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4253: [Backport 2.19-dev] Add support for `median()` + +**URL:** https://github.com/opensearch-project/sql/pull/4253 + +**Author:** @aalva500-prog + +**Created:** 2025-09-08T22:09:45Z + +**State:** MERGED + +**Merged:** 2025-09-10T21:35:55Z + +**Changes:** +125 -31 (8 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/f244bd6161ef8261fd222c93089c68d989e46f7e from https://github.com/opensearch-project/sql/pull/4234. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4250: [Backport 2.19-dev] Add wildcard support for rename command (#4019) + +**URL:** https://github.com/opensearch-project/sql/pull/4250 + +**Author:** @ritvibhatt + +**Created:** 2025-09-08T19:32:05Z + +**State:** MERGED + +**Merged:** 2025-09-16T21:44:59Z + +**Changes:** +730 -41 (11 files) + + +## Description + +(cherry picked from commit ab5f21a9eb82d511dff8adec383930d135930295) + +Added ```fields``` to queries in new rename tests to fix failing tests due ordering inconsistencies because of different Java versions (mentioned in comments in #4122) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4246: Support ISO8601-formatted string in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4246 + +**Author:** @yuancu + +**Created:** 2025-09-08T09:04:57Z + +**State:** MERGED + +**Merged:** 2025-09-17T02:55:41Z + +**Changes:** +90 -24 (7 files) + +**Labels:** `bug`, `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +PPL has been adopting `YYYY-MM-DD HH:mm:ss[.SSSSSSSSS]` as its default format for timestamp. However, OpenSearch's default timestamp format is ISO 8601. This may cause confusion for PPL customers as shown in #4188 . + +With this PR, we extent the support of timestamp format to ISO 8601. An example of such format is `2025-09-08T06:34:00Z`. It also brings additional benefit in that users can specify a timestamp with zone offset like `2025-09-07T18:34:00-12:00`. This allows users to input the data in a format that is easier to interpret. + +### Related Issues +Resolves #4188 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +@yuancu Can you add at least one IT for your changes? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @yuancu + + +> @yuancu Can you add at least one IT for your changes? + +Thanks for reminding. Added. + + +--- + +# PR #4245: Support serializing & deserializing UDTs when pushing down scripts + +**URL:** https://github.com/opensearch-project/sql/pull/4245 + +**Author:** @yuancu + +**Created:** 2025-09-08T07:11:43Z + +**State:** MERGED + +**Merged:** 2025-09-23T22:20:25Z + +**Changes:** +1546 -12 (18 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Before this PR, pushed-down scripts does not work when there are arguments with user-defined types (UDTs). For example, the following simple script does not work if pushdown is enabled: `source=bank | eval t = unix_timestamp(birthdate) | stats count() by t` + +
    Error response + +```json +{ + "error": { + "reason": "Error occurred in OpenSearch engine: all shards failed", + "details": "Shard[0]: org.opensearch.sql.exception.ExpressionEvaluationException: invalid to get doubleValue from value of type STRING\n\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", + "type": "SearchPhaseExecutionException" + }, + "status": 500 +} +``` + +
    + +The problem arises from the serialization process. The serialization of UDTs is processed by Calcite's `RelJson` serializer. When serializing a UDT, it only keeps its `SqlTypeName` for future restoration. However, as many of UDTs are mapped to `SqlTypeName.VARCHAR`, all UDTs are restored as a VARCHAR type instead of the original UDT. + +This PR fixes the issue by implementing custom logic when serializing & de-serializing UDTs. + +### Related Issues +Resolves #4063 , resolves #4322, and resolves #4340 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1207` + + +This test fails after merging with the latest main branch: it returns 7 rows instead of 3. + +It seems has something to do with aggregation push-down. Created issue #4296 to track it. Removed `head` from this test case and created another for this case. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java:None` + + +Is the pushed-down generated code the same as that before pushdown? I notice we probably use the Calcite default BinaryImplementor to generate the comparison code. The primitive it compares depends on the reflected type. That's a possible root cause because we're not sure if the reflected UDT is the same as before. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java:None` + + +The data type differs, the rest, including the comparison code, remains the same. + +For example, for this query: `source = lineitem | where l_commitdate < l_receiptdate` + +
    + Plan without this PR + +```json +{ + "calcite": { + "logical": """LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(l_receiptdate=[$0], l_returnflag=[$1], l_tax=[$2], l_shipmode=[$3], l_suppkey=[$4], l_shipdate=[$5], l_commitdate=[$6], l_partkey=[$7], l_orderkey=[$8], l_quantity=[$9], l_comment=[$10], l_linestatus=[$11], l_extendedprice=[$12], l_linenumber=[$13], l_discount=[$14], l_shipinstruct=[$15]) + LogicalFilter(condition=[<($6, $0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, lineitem]]) +""", + "physical": """CalciteEnumerableIndexScan(table=[[OpenSearch, lineitem]], PushDownContext=[[PROJECT->[l_receiptdate, l_returnflag, l_tax, l_shipmode, l_suppkey, l_shipdate, l_commitdate, l_partkey, l_orderkey, l_quantity, l_comment, l_linestatus, l_extendedprice, l_linenumber, l_discount, l_shipinstruct], SCRIPT-><($6, $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGdnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsX3JlY2VpcHRkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9yZXR1cm5mbGFnIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiRE9VQkxFIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibF90YXgiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsX3NoaXBtb2RlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibF9zdXBwa2V5IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9zaGlwZGF0ZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxfY29tbWl0ZGF0ZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfcGFydGtleSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfb3JkZXJrZXkiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJET1VCTEUiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJsX3F1YW50aXR5IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9jb21tZW50IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9saW5lc3RhdHVzIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiRE9VQkxFIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibF9leHRlbmRlZHByaWNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfbGluZW51bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfZGlzY291bnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsX3NoaXBpbnN0cnVjdCIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADKewogICJvcCI6IHsKICAgICJuYW1lIjogIjwiLAogICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiA2LAogICAgICAibmFtZSI6ICIkNiIKICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAAEHQADWxfcmVjZWlwdGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3QAD0xqYXZhL3V0aWwvTWFwO3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAJVElNRVNUQU1QfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEnQABERhdGVzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABl4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABsAAAAAc3EAfgAAAAAAAXcEAAAAAHh0AAxsX3JldHVybmZsYWd+cQB+ABF0AAZTVFJJTkd0AAVsX3RheH5xAH4AEXQABkRPVUJMRXQACmxfc2hpcG1vZGVxAH4AIHQACWxfc3VwcGtleX5xAH4AEXQABExPTkd0AApsX3NoaXBkYXRlc3EAfgAKcQB+ABNxAH4AFnEAfgAacQB+AB50AAxsX2NvbW1pdGRhdGVzcQB+AApxAH4AE3EAfgAWcQB+ABpxAH4AHnQACWxfcGFydGtleXEAfgAndAAKbF9vcmRlcmtleXEAfgAndAAKbF9xdWFudGl0eXEAfgAjdAAJbF9jb21tZW50c3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3EAfgAPeHEAfgAMfnEAfgARdAAHVU5LTk9XTn5xAH4AFXQABFRleHRxAH4AGnNxAH4AAAAAAAN3BAAAAAB4dAAMbF9saW5lc3RhdHVzcQB+ACB0AA9sX2V4dGVuZGVkcHJpY2VxAH4AI3QADGxfbGluZW51bWJlcn5xAH4AEXQAB0lOVEVHRVJ0AApsX2Rpc2NvdW50cQB+ACN0AA5sX3NoaXBpbnN0cnVjdHEAfgAgeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1758005112179638000}},"boost":1.0}},"_source":{"includes":["l_receiptdate","l_returnflag","l_tax","l_shipmode","l_suppkey","l_shipdate","l_commitdate","l_partkey","l_orderkey","l_quantity","l_comment","l_linestatus","l_extendedprice","l_linenumber","l_discount","l_shipinstruct"],"excludes":[]},"sort":[{"_doc":{"order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) +""" + } +} +``` + +
    + +
    + Plan with this PR + +```json +{ + "calcite": { + "logical": """LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(l_receiptdate=[$0], l_returnflag=[$1], l_tax=[$2], l_shipmode=[$3], l_suppkey=[$4], l_shipdate=[$5], l_commitdate=[$6], l_partkey=[$7], l_orderkey=[$8], l_quantity=[$9], l_comment=[$10], l_linestatus=[$11], l_extendedprice=[$12], l_linenumber=[$13], l_discount=[$14], l_shipinstruct=[$15]) + LogicalFilter(condition=[<($6, $0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, lineitem]]) +""", + "physical": """CalciteEnumerableIndexScan(table=[[OpenSearch, lineitem]], PushDownContext=[[PROJECT->[l_receiptdate, l_returnflag, l_tax, l_shipmode, l_suppkey, l_shipdate, l_commitdate, l_partkey, l_orderkey, l_quantity, l_comment, l_linestatus, l_extendedprice, l_linenumber, l_discount, l_shipinstruct], SCRIPT-><($6, $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQG03sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9yZWNlaXB0ZGF0ZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxfcmV0dXJuZmxhZyIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfdGF4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9zaGlwbW9kZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfc3VwcGtleSIKICAgIH0sCiAgICB7CiAgICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsX3NoaXBkYXRlIgogICAgfSwKICAgIHsKICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxfY29tbWl0ZGF0ZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfcGFydGtleSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfb3JkZXJrZXkiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJET1VCTEUiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJsX3F1YW50aXR5IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9jb21tZW50IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibF9saW5lc3RhdHVzIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiRE9VQkxFIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibF9leHRlbmRlZHByaWNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfbGluZW51bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImxfZGlzY291bnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsX3NoaXBpbnN0cnVjdCIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADKewogICJvcCI6IHsKICAgICJuYW1lIjogIjwiLAogICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiA2LAogICAgICAibmFtZSI6ICIkNiIKICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAAEHQADWxfcmVjZWlwdGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3QAD0xqYXZhL3V0aWwvTWFwO3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAJVElNRVNUQU1QfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEnQABERhdGVzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABl4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABsAAAAAc3EAfgAAAAAAAXcEAAAAAHh0AAxsX3JldHVybmZsYWd+cQB+ABF0AAZTVFJJTkd0AAVsX3RheH5xAH4AEXQABkRPVUJMRXQACmxfc2hpcG1vZGVxAH4AIHQACWxfc3VwcGtleX5xAH4AEXQABExPTkd0AApsX3NoaXBkYXRlc3EAfgAKcQB+ABNxAH4AFnEAfgAacQB+AB50AAxsX2NvbW1pdGRhdGVzcQB+AApxAH4AE3EAfgAWcQB+ABpxAH4AHnQACWxfcGFydGtleXEAfgAndAAKbF9vcmRlcmtleXEAfgAndAAKbF9xdWFudGl0eXEAfgAjdAAJbF9jb21tZW50c3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3EAfgAPeHEAfgAMfnEAfgARdAAHVU5LTk9XTn5xAH4AFXQABFRleHRxAH4AGnNxAH4AAAAAAAN3BAAAAAB4dAAMbF9saW5lc3RhdHVzcQB+ACB0AA9sX2V4dGVuZGVkcHJpY2VxAH4AI3QADGxfbGluZW51bWJlcn5xAH4AEXQAB0lOVEVHRVJ0AApsX2Rpc2NvdW50cQB+ACN0AA5sX3NoaXBpbnN0cnVjdHEAfgAgeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1758005410734926000}},"boost":1.0}},"_source":{"includes":["l_receiptdate","l_returnflag","l_tax","l_shipmode","l_suppkey","l_shipdate","l_commitdate","l_partkey","l_orderkey","l_quantity","l_comment","l_linestatus","l_extendedprice","l_linenumber","l_discount","l_shipinstruct"],"excludes":[]},"sort":[{"_doc":{"order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) +""" + } +} +``` +
    + +The [above comment](https://github.com/opensearch-project/sql/pull/4245#issuecomment-3268673999) shows part of the decoded script, their main difference lies in operand types. + +I guess part of the reason that the UDT comparison is not supported is that we are not using `PPLFuncImpTable` when resolving functions in the script? + +```java +# RelJsonSerializer.java#L54 +private static final SqlOperatorTable pplSqlOperatorTable = + SqlOperatorTables.chain( + PPLBuiltinOperators.instance(), + SqlStdOperatorTable.instance(), + // Add a list of necessary SqlLibrary if needed + SqlLibraryOperatorTableFactory.INSTANCE.getOperatorTable( + SqlLibrary.MYSQL, SqlLibrary.BIG_QUERY, SqlLibrary.SPARK, SqlLibrary.POSTGRESQL)); +``` + +Btw, we did not implement any special comparison operator for date time UDTs like [that for IP](https://github.com/opensearch-project/sql/blob/main/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java). But it has been working well somehow with `BinaryImplementor`: + +```java +// RexImpTable.java#L3181 +final Type type0 = argValueList.get(0).getType(); +final Type type1 = argValueList.get(1).getType(); +final SqlBinaryOperator op = (SqlBinaryOperator) call.getOperator(); +final RelDataType relDataType0 = call.getOperands().get(0).getType(); +``` + +`type0` and `type1` are resolved to VARCHAR for date time UDTs here. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java:None` + + +I pulled the code and checked the example ppl. I think I figured out the root cause. The obvious error happens when Calcite code generation cannot find the less than function signature by provided Linq4j Expressions. +See this error: +``` +RuntimeException[while resolving method 'lt[class java.lang.Object, class java.lang.Object]' in class class org.apache.calcite.runtime.SqlFunctions] +``` +Calcite `lt(.., ...)` method doesn't have such signature while resolving our UDT. See Calcite code: https://github.com/apache/calcite/blob/main/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java#L2183-L2248 + +When we hardcode the UDT type as STRING, it apparently generates such code for runtime evaluation. We can see that it's STRING for sure. +``` +public Object[] apply(Object root0) { + final String input_value = ((org.apache.calcite.DataContext) root0).get("l_commitdate") == null ? null : ((org.apache.calcite.DataContext) root0).get("l_commitdate").toString(); + final String input_value0 = ((org.apache.calcite.DataContext) root0).get("l_receiptdate") == null ? null : ((org.apache.calcite.DataContext) root0).get("l_receiptdate").toString(); + return new Object[] { + input_value == null || input_value0 == null ? null : Boolean.valueOf(org.apache.calcite.runtime.SqlFunctions.lt(input_value, input_value0))}; +} +``` + +Then what caused this wrong method resolution if we set the field as ExprSqlType.Timestamp UDT? From the previous normal generated code, we can see it assigns the `input_value` and `input_value0` to the type `String`. In our case, it has to be assigned to `Object`. It means we resolved the wrong Java class for UDT. This reminds me that during compilation of script code, we might used incorrect type factory for input because we just simply copied Calcite code. + +This line causes the issue: https://github.com/opensearch-project/sql/blob/main/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java#L131-L132. It uses default JavaTypeFactory instead of our own OpenSearchTypeFactory that tunes UDT logic. Our OpenSearchTypeFactory overrides the `getJavaClass` method to resolve UDT's Java class: https://github.com/opensearch-project/sql/blob/main/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java#L323-L329 + +So I think a simple fix is to remove the problematic line and pass OpenSearchTypeFactory.TYPE_FACTORY to our +`ScriptInputGetter`. And we need to make sure no impact of other cases. + + + +### @songkant-aws on `opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJsonTest.java:63` + + +[Minor and Non-blocking] It's better to add some cases of serializing and deserializing nested structure like List[String, UDT_TIMESTAMP, UDT_IP] or Map{(key1, Integer), (key2, UDT_TIMESTAMP) } + + +### @yuancu on `opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJsonTest.java:63` + + +Thanks for the suggestion! Tests added + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java:None` + + +Thank you for help debugging! + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:705` + + +can you add the IT: +``` +source=%s | eval t = date_add(birthdate, interval 1 day) | stats count() by span(t, 1d) +``` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1202` + + +ditto + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:705` + + +Added + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:1202` + + +Added + + +## General Comments + + +### @yuancu + + +Another issue arises after supporting serializing & pushing down UDTs: `testQ12` and `testQ21` in `CalcitePPLTpchIT` fail with reason: `NoSuchMethodException[org.apache.calcite.runtime.SqlFunctions.lt(java.lang.Object,java.lang.Object)]` + +Before this PR, the UDT are pushed down as strings, so it went for SqlFunctions.lt(java.lang.String,java.lang.String)` when resolving date comparison. Now, the they have became UDTs, it can not resolve UDT date comparison with proper implementation. + +For example, Q12 query contains timestamp comparison: + +``` +source = orders +| join ON o_orderkey = l_orderkey lineitem +| where l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_shipmode in ('MAIL', 'SHIP') + and l_receiptdate >= date('1994-01-01') + and l_receiptdate < date_add(date('1994-01-01'), interval 1 year) +| stats sum(case(o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH', 1 else 0)) as high_line_count, + sum(case(o_orderpriority != '1-URGENT' and o_orderpriority != '2-HIGH', 1 else 0)) as low_line_count + by l_shipmode +| fields l_shipmode, high_line_count, low_line_count +| sort l_shipmode +``` + +Before this PR, the comparison script is like below: +```json +rowTypet: { + "fields": [ + { + "type": "VARCHAR", + "nullable": true, + "precision": -1, + "name": "l_receiptdate" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": -1, + "name": "l_shipmode" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": -1, + "name": "l_commitdate" + }, +... + ], + "nullable": false + + "op": { + "name": "<", + "kind": "LESS_THAN", + "syntax": "BINARY" + }, +... +} +``` + +With this PR, the type becomes UDT: + +```json +rowTypet:{ + "fields": [ + { + "udt": "EXPR_TIMESTAMP", + "type": "VARCHAR", + "nullable": true, + "precision": -1, + "name": "l_receiptdate" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": -1, + "name": "l_shipmode" + }, + { + "udt": "EXPR_TIMESTAMP", + "type": "VARCHAR", + "nullable": true, + "precision": -1, + "name": "l_commitdate" + }, + ... + } + ], + "nullable": false +}, + "op": { + "name": "<", + "kind": "LESS_THAN", + "syntax": "BINARY" + }, +... + ] +} +``` + +The problem arises from `where l_commitdate < l_receiptdate and l_shipdate < l_commitdate`. It has problem resolving comparing UDT. In calcite without push-down enabled, timestamp UDT comparison is resolved to string comparison since the underlying type of `EXPR_TIMESTAMP` is a `RexLiteral` storing a string constant (see `RexToLibTranslator.java#L1250`). I don't know why it does not work when UDT is pushed down. + +Working on fixing it. + + +--- + +# PR #4244: [Backport 2.19-dev] Implement `Append` command with Calcite (#4123) + +**URL:** https://github.com/opensearch-project/sql/pull/4244 + +**Author:** @songkant-aws + +**Created:** 2025-09-08T03:41:42Z + +**State:** MERGED + +**Merged:** 2025-09-10T15:26:17Z + +**Changes:** +942 -1 (20 files) + + +## Description + +* Implement Append Command + + + +* Fix spotless check + + + +* Rephrase append.rst + + + +* Support subsearch different index for append command + + + +* Fix some tests and add cross cluster IT + + + +* Not support empty subsearch input for now + + + +* Fix doctest + + + +* Support empty source edge case + + + +* Fix anonymizer tests + + + +* Add missing test cases for nested join or lookup command in appended subsearch + + + +* Fix compile issue + + + +--------- + +### Description +Manual backport #4123 into 2.19-dev + +### Related Issues + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@songkant-aws please resolve the conflicts + + +--- + +# PR #4243: [Backport 2.19-dev] Support pushdown sort by simple expressions (#4071) + +**URL:** https://github.com/opensearch-project/sql/pull/4243 + +**Author:** @songkant-aws + +**Created:** 2025-09-08T03:01:54Z + +**State:** MERGED + +**Merged:** 2025-09-11T16:31:03Z + +**Changes:** +878 -1 (12 files) + + +## Description + +* Support pushdown sort by simple expressions + + + +* Fix IT for no pushdown case + + + +* Add minor case to allow sort pushdown for casted floating number + + + +* Fix the issue of using wrong fromCollation + + + +* Add some unit tests for OpenSearchRelOptUtil + + + +* Fix checkstyle + + + +--------- + +### Description +Manual backpot #4071 into 2.19-dev + +### Related Issues + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4241: [Feature] Implementation of mode `sed` and `offset_field` in rex PPL command + +**URL:** https://github.com/opensearch-project/sql/pull/4241 + +**Author:** @RyanL1997 + +**Created:** 2025-09-08T02:06:01Z + +**State:** MERGED + +**Merged:** 2025-09-11T16:14:35Z + +**Changes:** +507 -12 (15 files) + +**Labels:** `PPL`, `feature`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Implementation of mode sed and offset_field in rex PPL command + +### Related Issues +* Resolve #4108 +* Related #4109 + +### Design Details +For the implementation of these 2 command extensions, here are the implementation choice I made: + +#### 1. Mode `sed` - Complete native Calcite optimization +- `REGEXP_REPLACE_PG_3` for basic substitution +- `REGEXP_REPLACE_PG_4` for flagged substitution (`g`, `i`) +- `REGEXP_REPLACE_5` for nth occurrence +- `TRANSLATE3` for transliteration (`y/from/to/`) + +#### 2. `offset_field` - custom UDF `REX_OFFSET` + +- **What's our `REX_OFFSET` does**: + - Takes: (text, pattern) + - Supports multiple named capture groups: (?...)(?...) + - Returns: Complex string format like "name1=start1-end1&name2=start2-end2" + - Provides both start AND end positions for each group + - Meets the functionality requirements + +- **What Calcite's `REGEXP_INSTR` does ([Calcite Doc](https://calcite.apache.org/javadocAggregate/org/apache/calcite/runtime/SqlFunctions.RegexFunction.html#regexpInstr(java.lang.String,java.lang.String)))**: + - Takes: (text, pattern, position, occurrence, occurrencePosition) + - Only works with single capture group (throws error if multiple groups exist) + - Returns: Single integer (either start or end position, not both) + - Cannot handle multiple groups or complex output formats + +**Key Blocking Issues For NOT able to replace `REX_OFFSET` with `REGEXP_INSTR`:** +- **Multiple Group Limitation:** +```java + // Calcite's REGEXP_INSTR calls this internally: + checkMultipleCapturingGroupsInRegex(matcher, "REGEXP_INSTR"); + // Throws exception if matcher.groupCount() > 1 +``` +- **Output Format Mismatch:** + - `REX_OFFSET`: "group1=5-10&group2=15-20" (structured string) + - `REGEXP_INSTR`: 15 (single integer position) +- **Different Functionality:** + - `REX_OFFSET`: Returns start-end ranges for all named groups + - `REGEXP_INSTR`: Returns single position (start OR end) for single group + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +np: `mode=(EXTRACT | SED)` ? + + +### @dai-chen on `docs/user/ppl/cmd/rex.rst:None` + + +This option only works for extract mode or both? + + +### @dai-chen on `docs/user/ppl/cmd/rex.rst:122` + + +`domain` is second capture group in regex but comes first here? Is it by design? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/udf/RexOffsetFunction.java:None` + + +Probably capture the specific exception type? So we return null if any error? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Could you elaborate this design in PR description? Also where is the REX_SED and fallback logic? I may miss it somewhere + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Good catch, I should actually remove this comment, and here is the reason: + +In the previous design version I developed, I created an UDF called `REX_SED`, however, I have completely removed these logic, and start using all the native calcite sql functions for the entire SED feature. +- Here is the original comment I discussed with @penghuo : https://github.com/opensearch-project/sql/pull/4109#discussion_r2305922467 +- Here is the related investigation I did in my original RFC for the usage of UDF: https://github.com/opensearch-project/sql/issues/4108#issuecomment-3231105679 + +But yes, let me transfer some of the above info the PR description and remove this misleading comment. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +fixed. + + +### @RyanL1997 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +fixed. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:122` + + +Good observation and yes, this behavior aligns with the requirement. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +This is a very good question. I have rechecked the requirement also, and confimed`offset_field` should be only working on the extraction mode. I added an clear exception msg: + +```bash +❯ curl -X POST "localhost:9200/_plugins/_ppl" -H 'Content-Type: application/json' -d'{ + "query": "source=accounts | rex field=email mode=sed offset_field=email \"s/@.*/@newcompany.com/\" | fields firstname, email | head 5" + }' | jq + +{ + "error": { + "reason": "Invalid Query", + "details": "Rex command: offset_field cannot be used with mode=sed. The offset_field option is only supported in extract mode.", + "type": "IllegalArgumentException" + }, + "status": 400 +} +``` + +And also I have added a test, and updated the doc. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Btw I have also updated the PR description with a section of "Design Details". + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/udf/RexOffsetFunction.java:None` + + +Good point. I checked the logic, exceptions can happen should be `PatternSyntaxException` - When regex pattern compilation fails. + +So that I have also updated the above logic, and improve the better error handling instead of silently returning `null`. Here is an example: + +```bash +curl -X POST "localhost:9200/_plugins/_ppl" -H 'Content-Type: application/json' -d'{ + "query": "source=accounts | rex field=email \"(?[^@)+@(?.+)\" offset_field=pos | fields email, pos | head 1" + }' + +{ + "error": { + "reason": "Invalid Query", + "details": "Invalid regex pattern in rex command: Unclosed character class near index 26\n(?[^@)+@(?.+)", + "type": "IllegalArgumentException" + }, + "status": 400 +}% +``` +And this also aligns with the requirement. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2159` + + +np: If sed/regex is common across different commands, it may worth creating dedicated AST expression nodes and separate these into its own visit method in the future. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:122` + + +Actually, checked with @dai-chen offline, it should be ordered alphabetically :/ +I have changed into something like the following. + +```bash +curl -X POST "localhost:9200/_plugins/_ppl" -H 'Content-Type: application/json' -d'{ + "query": "source=accounts | rex field=email \"(?[a-zA-Z0-9._%+-]+)@(?[a-zA-Z0-9.-]+)\\.(?[a-zA-Z]{2,})\" offset_field=positions | fields email, user, domain, tld, positions | head 3" + }' | jq + +{ + "schema": [ + { + "name": "email", + "type": "string" + }, + { + "name": "user", + "type": "string" + }, + { + "name": "domain", + "type": "string" + }, + { + "name": "tld", + "type": "string" + }, + { + "name": "positions", + "type": "string" + } + ], + "datarows": [ + [ + "amberduke@pyrami.com", + "amberduke", + "pyrami", + "com", + "domain=10-15&tld=17-19&user=0-8" + ], + [ + "hattiebond@netagy.com", + "hattiebond", + "netagy", + "com", + "domain=11-16&tld=18-20&user=0-9" + ], + [ + "nanettebates@quility.com", + "nanettebates", + "quility", + "com", + "domain=13-19&tld=21-23&user=0-11" + ] + ], + "total": 3, + "size": 3 +} +``` + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4239: [Backport 2.19-dev] Core Implementation of `rex` Command In PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4239 + +**Author:** @RyanL1997 + +**Created:** 2025-09-05T23:28:11Z + +**State:** MERGED + +**Merged:** 2025-09-09T22:17:01Z + +**Changes:** +1624 -4 (31 files) + + +## Description + +### Description +Manual backport + +### Related Issues +* Relate https://github.com/opensearch-project/sql/pull/4109 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +I saw the doc tests failed because the sample data used for doctest is different than the one we used on main. + + +--- + +# PR #4238: [Backport 2.19-dev] Enable pushdown optimization for filtered aggregation + +**URL:** https://github.com/opensearch-project/sql/pull/4238 + +**Author:** @dai-chen + +**Created:** 2025-09-05T22:27:14Z + +**State:** MERGED + +**Merged:** 2025-09-08T19:04:45Z + +**Changes:** +491 -17 (11 files) + +**Labels:** `enhancement`, `PPL` + +**Assignees:** @dai-chen + + +## Description + +### Description + +Backport https://github.com/opensearch-project/sql/commit/a1cb71f584ca88d86ce26317927ee9399d060cbf. from https://github.com/opensearch-project/sql/pull/4213. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4237: [Backport] Fast tests for 2.19-dev + +**URL:** https://github.com/opensearch-project/sql/pull/4237 + +**Author:** @Swiddis + +**Created:** 2025-09-05T22:13:26Z + +**State:** MERGED + +**Merged:** 2025-09-05T23:36:34Z + +**Changes:** +2954 -2596 (31 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +The workflow file had a nasty merge conflict, so I just replaced it with the new one and fixed the java versioning. + +Combination of #4190, #4222, #4193, w/ 2.19-specific tweaks. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4234: Add support for `median()` + +**URL:** https://github.com/opensearch-project/sql/pull/4234 + +**Author:** @aalva500-prog + +**Created:** 2025-09-05T05:58:00Z + +**State:** MERGED + +**Merged:** 2025-09-08T21:11:07Z + +**Changes:** +123 -29 (8 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description +PPL lacks `median` function `median()` in stats aggregation functions. + +This PR implements the support of the `median` shortcut and rewrite it to `perc50()`, which is already supported with pushdown optimization. + +`median()` and `percentile50()` are functionally equivalent - they both return the 50th percentile value of the specified field. + +However, there are some subtle differences: + +- `median()` is specifically designed for finding the middle value and may have slight performance optimizations for this exact use case. +- percentile50() is part of the more general percentile function family, which can calculate any percentile (1-99) + +In practice, both will give you the same result for the 50th percentile calculation. The choice between them is often a matter of code readability and intent - `median()` makes it immediately clear you want the median value, while `percentile50()` shows you're working with percentile calculations. + +### Related Issues +Resolves #[4200] + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +Agree with Chen that an AST-level rewrite might be cleaner, but I don't have a strong preference either way. + + +### @RyanL1997 - COMMENTED + + +Hi @aalva500-prog , thanks for the change, and I just left some comments. + + +### @RyanL1997 - APPROVED + + +LGTM. Thanks for taking this on @aalva500-prog . + + +## Review Comments + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1127` + + +I'm thinking what's the pros and cons between registering a new function vs. rewriting in AST layer? + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +nit: `List.of`? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +nit: Consider extracting 50.0 as a constant. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Should there be `null` checking for `field.getType()`? What happens if the field type is `null`? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1129` + + +Question: The `distinct` and `argList` parameters are not used. Should there be validation that: + +- `distinct` is false (since `MEDIAN` doesn't support `DISTINCT`)? +- `argList` is empty (since `MEDIAN` takes no additional arguments)? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Also, just for my knowledge: why MEDIAN is implemented as `PERCENTILE_APPROX(field, 50.0)`? + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1127` + + +Hi @dai-chen, please find below why I chose registering a new function instead of rewriting it in AST layer: + +- MEDIAN is a legitimate aggregate function, not syntax sugar. Function registration keeps it in the proper semantic layer where aggregate functions belong, maintaining clean separation between function logic and query planning infrastructure. + +- Consistency: All other aggregate functions (AVG, COUNT, SUM) use function registration. IMO, MEDIAN should follow the same pattern for code consistency. + +- Function registration provides automatic type checking, error handling, and validation through the existing PPLTypeChecker framework. AST rewriting would require manually implementing these safeguards or risk runtime failures. + +- Maintainability: All functions logic stays in one location, so changes, bug fixes, or enhancements are contained and don't require touching multiple planning components. AST rewriting scatters the logic across visitors, rules, and planners. + +I understand AST rewriting could be preferred when performance optimization is critical, however, I don't think in this specific case there is a great difference, but I could be wrong. That being said, please let me know if you think the current implementation can cause a significant performance bottleneck and I should be using AST rewriting approach instead. Thanks! + + + + + + + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Good catch! If `field.getType()` returns `null`, calling `getSqlTypeName()` on it would throw a `NullPointerException`. This affects both `MEDIAN` and `PERCENTILE_APPROX` functions, btw. + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Good suggestion! Let me extract the 50.0 value as a constant. + +To answer your question, `MEDIAN` is implemented as `PERCENTILE_APPROX(field, 50.0)` because: + +1) Mathematical equivalence: The `median` is by definition the `50th percentile` - the value that splits the data in half. + +2) Code reuse: Instead of implementing a separate median algorithm, I'm leveraging the existing `PERCENTILE_APPROX` function which already handles: + +- Approximate percentile calculations (efficient for large datasets) +- Type handling and validation +- Integration with the underlying execution engine + +3) Performance: PERCENTILE_APPROX uses an approximate algorithm (like t-digest) that's much faster than exact median calculation, especially for large datasets where you'd need to sort all values. + +4) Consistency: Both functions will behave the same way in terms of approximation accuracy and performance characteristics. Adding to this point, if you look into the `BuiltinFunctionName` class, the mapping for the `percentile` function also uses the `PERCENTILE_APPROX` function: + +``` + .put("percentile", BuiltinFunctionName.PERCENTILE_APPROX) +``` + +In general, the "APPROX" part means it's trading some precision for speed - perfectly acceptable for most analytical use cases where you need fast results on large datasets rather than exact values. + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1129` + + +You're absolutely right, the `distinct` and `argList` parameters are not used in the implementation, and there should be validation to ensure they meet the expected constraints for MEDIAN. Let me add the necessary validation, thanks! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Good catch! I'll simplify to use `List.of()`. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1127` + + +I think the main benefit of current approach is exposing the same function set in SQL. We can discuss more later. Thanks! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4233: Add experimental annotations to direct-query and direct-query-core + +**URL:** https://github.com/opensearch-project/sql/pull/4233 + +**Author:** @lezzago + +**Created:** 2025-09-04T22:02:00Z + +**State:** MERGED + +**Merged:** 2025-09-09T21:41:15Z + +**Changes:** +165 -5 (51 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Add experimental annotations to direct-query and direct-query-core since these will be experimental features. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @noCharger on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/ExecuteDirectQueryRequest.java:20` + + +nit: Place the comment before any annotations. Same as the rest. + + +### @Swiddis on `direct-query-core/src/main/java/org/opensearch/sql/directquery/rest/model/ExecuteDirectQueryRequest.java:20` + + +^ this seems like something spotless should be able to check/apply, if it's important let's make an issue for that. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4232: [Backport 2.19-dev] Support timechart command with Calcite (#3993) + +**URL:** https://github.com/opensearch-project/sql/pull/4232 + +**Author:** @selsong + +**Created:** 2025-09-04T20:30:50Z + +**State:** MERGED + +**Merged:** 2025-09-05T20:04:30Z + +**Changes:** +1901 -2 (25 files) + + +## Description + +### Description +Backport #3993 timechart PR into 2.19-dev branch +manual backport due to conflict + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4228: Push down limit operator into aggregation bucket size + +**URL:** https://github.com/opensearch-project/sql/pull/4228 + +**Author:** @qianheng-aws + +**Created:** 2025-09-04T18:34:13Z + +**State:** MERGED + +**Merged:** 2025-09-10T19:46:14Z + +**Changes:** +197 -41 (24 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Push down limit operator into aggregation bucket size. Also adjusting the digest of limit push down to include `offset` information as well. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3961 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:None` + + +Is it accurate? should be push down the limit into the aggregation bucket? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:301` + + +if hasAggregatorInSortBy, limit should not been pushed +`source=log00001 | stats sum(age) as age by vi | sort age | head 1`, limit 1 should not pushed to composite-aggregation. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:301` + + +In the DSL of composite aggregation, the sort operation is executed previous to size cutting. That's exactly the execution sequence we need for this PPL. + +See the test case in `StatsCommandIT::testStatsWithLimit`, if we add test case like: +``` +source=%s | stats avg(balance) as a by age | sort - age | head 5 +``` +The final results is: +``` +... + "datarows": [ + [ + null, + 36 + ], + [ + 48086, + 34 + ], + [ + 4180, + 33 + ], + [ + 39225, + 32 + ], + [ + 32838, + 28 + ] + ], + "size": 5 +} +``` +It meets our expectation. + + +What's debatable is the case of PPL: +``` +source=%s | stats avg(balance) as a by age | head 5 | sort - age +``` +I think it's both OK whether push down sort after limit since we don't ensure a order for limit operator. So the output of limit operator is not ensured `idempotent `. But since we already has restriction in `OpenSearchSortIndexScanRule` to prevent push down sort after limit, the behavior can be align. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:301` + + +> source=log00001 | stats sum(age) as age by vi | sort age | head 1 + +This query will combine sort + limit into a single `LogicalSort` operator, and it will cause an issue by push down it as a limit directly. + +This is actually an robust bug already exist in `OpenSearchLimitIndexScanRule`. We need to check the collation before pushing down the `LogicalSort` as a limit. + +Add tests for verifying the data and plan separately in the latest [commit](https://github.com/opensearch-project/sql/pull/4228/commits/1835f81e99ca4a4188279fd4bd6ebce4b76b893a). + + +## General Comments + + +### @LantaoJin + + +I saw some failures after this PR merged. ref https://github.com/opensearch-project/sql/actions/runs/17625084933/job/50079822319 @qianheng-aws + + +--- + +# PR #4226: [Backport 2.19-dev] Add support for `list()` multi-value stats function (#4161) + +**URL:** https://github.com/opensearch-project/sql/pull/4226 + +**Author:** @ps48 + +**Created:** 2025-09-04T17:16:13Z + +**State:** MERGED + +**Merged:** 2025-09-04T20:36:18Z + +**Changes:** +540 -0 (15 files) + +**Labels:** `enhancement` + + +## Description + +* Add support for list function + + + +* fix test and resolve comments + + + +* fix spotlesscheck + + + +* revert list() to UDAF + + + +* update tests + + + +* update tests and docs + + + +* apply spotless + + + +* remove order by test + + + +* Add a group by test case in eval + + + +* revert Optionality in UDF + + +(cherry picked from commit 0875affcd0f120cd9880d28c605e143d0477ac29) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4225: [Backport 2.19-dev] Pushdown earliest/latest aggregate functions (#4166) + +**URL:** https://github.com/opensearch-project/sql/pull/4225 + +**Author:** @ykmr1224 + +**Created:** 2025-09-04T16:56:34Z + +**State:** MERGED + +**Merged:** 2025-09-05T16:09:28Z + +**Changes:** +129 -12 (7 files) + + +## Description + +backport https://github.com/opensearch-project/sql/commit/2f8f75d0a543d36003bcf440cae1208208c91390 from https://github.com/opensearch-project/sql/pull/4166 + +(manual backport due to conflict) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +I need to fix switch/case notation for Java 11. + + +### @ykmr1224 + + +Fixed Java 11 issue + + +--- + +# PR #4224: Support time modifiers in search command + +**URL:** https://github.com/opensearch-project/sql/pull/4224 + +**Author:** @yuancu + +**Created:** 2025-09-04T09:50:17Z + +**State:** MERGED + +**Merged:** 2025-09-30T17:58:23Z + +**Changes:** +1664 -121 (35 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `v3.3.0` + + +## Description + +### Description +Support time modifier in search command. + +Examples: + +``` +source=any earliest='2020-12-10' +source=any earliest='2020-12-10' latest='2025-09-10 13:00:12' +source=any latest=now +source=logs earliest=-7d +source=logs earliest='-1d@d' +``` + +Queries with earliest and latest time modifiers will be converted to a **query string query** with comparison conditions on the implicit timestamp field `@timestamp`. For example, query `source=time_test earliest=-1year latest='-50d@w3'` is converted to the following DSL: +```json +{ + "query": { + "query_string": { + "query": "(@timestamp:>=now-1y) AND (@timestamp:<=now-50d/w-5d)", + } + } +} +``` + +**Work items** + +### Related Issues +Resolves #4135 + +### Implementation Walk-through + +#### Implementation Details + + 1. Time Modifier Processing + + The core functionality is implemented in the `visitTimeModifierValue` and `visitTimeModifierExpression` methods in AstExpressionBuilder.java: + + - `visitTimeModifierValue`: Converts time values to OpenSearch date math expressions + - `visitTimeModifierExpression`: Creates comparison conditions for `@timestamp` field in query string + + 2. Time Format Conversion + + The PR extends DateTimeUtils.java with the parseRelativeTime method, which transforms PPL time expressions to OpenSearch date math expressions. + + 3. Query String Query Creation + + Time modifiers are converted to search comparisons with a query string on the implicit `@timestamp` field, which is now defined as a constant in OpenSearchConstants.java. For example, `search earliest=-10days@month latest=now()` is converted to a query string query like the following: + +```json +{ + "query": { + "query_string": { + "query": "(@timestamp:>=-10d/M) AND (@timestamp:<=now)" + } + } +} +``` + + + #### PPL Time Modifiers to OpenSearch Date Math Examples + +- Absolute Times + + | PPL Time Modifier | OpenSearch Date Math | Explanation | + |-----------------------|----------------------|----------------| + | '2023-01-01' | 2023-01-01 | Simple date | + | '2023-01-01 13:45:30' | 2023-01-01T13:45:30Z | Date with time | + +- Relative Times + + | PPL Time Modifier | OpenSearch Date Math | Explanation | + |-------------------|----------------------|-----------------| + | now | now | Current time | + | -30s | now-30s | 30 seconds ago | + | -1h | now-1h | 1 hour ago | + | +1d | now+1d | 1 day in future | + +- Snapping to Time Units (Using @) + + | PPL Time Modifier | OpenSearch Date Math | Explanation | + |-------------------|----------------------|---------------------------| + | -1d@d | now-1d/d | 1 day ago, rounded to day | + | @h | now/h | Round to current hour | + | @M | now/M | Round to current month | + +- Week Days + + | PPL Time Modifier | OpenSearch Date Math | Explanation | + |-------------------|----------------------|--------------------------------------------| + | @w0 | /w-1d or /w+6d | Previous Sunday (depending on current day) | + | @w1 | /w | Start of week (Monday) | + | @w4 | /w+3d | Thursday of current week | + +- Special Cases + + | PPL Time Modifier | OpenSearch Date Math | Explanation | + |-------------------|----------------------|-----------------------------------------| + | @q | /M or /M-1M or /M-2M | Start of current quarter | + | -2q | -6M | 2 quarters ago (6 months) | + | 1234.567 | 1234567 | Unix timestamp (seconds → milliseconds) | + +- Complex Expressions + + | PPL Time Modifier | OpenSearch Date Math | Explanation | + |-------------------|----------------------|-------------------------------------------------------| + | -1d+1y@mon | now-1d+1y/M | 1 day ago + 1 year, rounded to month | + | -3d@d-2h+10m | now-3d/d-2h+10m | 3 days ago, day start, minus 2 hours, plus 10 minutes | + + #### Limitations + +Currently, the following formats has to be quoted: + +- chained offsets. e.g. `-1day@month+1h` or `+1year-4day` + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EarliestFunction.java:69` + + +Modified because `earliest` should be *greater or equal*. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Asking for help: currently, `AT timeModifierUnit` can not be matched. Since antlr is parsed bottom-up, plus that a shorter rule matching longer text will be prioritized, `AT timeModifierUnit` will always be matched as an identifier by the following rule: + +```antlr +// OpenSearchPPLLexer.g4 +ID: ID_LITERAL; +fragment ID_LITERAL: ([@*A-Z_])+?[*A-Z_\-0-9]*; +``` + +```antlr +// OpenSearchPPLParser.g4 +ident + : (DOT)? ID +``` + +As a result, I can not write something like `source=t earliest=-10h@day`, but have to write it as `source=t earliest='-10h@day'` because `-10h@day` will be matched to the shorter rule `ident`. + +I tried to make the grammar context aware, but had no luck with multiple solutions. Is there any suggestion for a workaround? + + +### @yuancu on `core/build.gradle:None` + + +This dependency is added for the access of `org.opensearch.common.time.DateMathParser` -- I want to make sure that the parsed OpenSearch date math like `now-1d/M-2M` returns the intended instant for `-1d@q` + + +### @vamsimanohar on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +can you add these new terms to searchKeyWords or `keywordCanbeId` + + +### @vamsimanohar on `ppl/src/main/antlr/OpenSearchPPLParser.g4:125` + + +For my understanding: This implies we can write only timeModifier in searchExpression. +Can you update search.rst with these changes. + + +### @vamsimanohar on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +@yuancu have you figured out anything on this? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java:1010` + + +What if earliest is a field name? Should add backtick? + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:125` + + ++1, please update doc. + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:127` + + +do timemodifier support NOT? e.g. `search source=index NOT latest=now` + + +### @penghuo on `core/build.gradle:None` + + +parseRelativeTime() is called by PPL module only, right? if move parseRelativeTime to PPL module, does opensearch depedency still needed? + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +No, I haven't :/ + + +### @yuancu on `core/build.gradle:None` + + +Yes, it is only called by PPL module. If I still want to assure the correctness of the parsed instant, I'll have to move this dependency to the PPL module. + +Is there any concern over this dependency? If it's for the performance, as the dependency is only test time, I assume there would be no harm to the runtime performance. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +Added `timeModifierUnit` to `searchableKeyWord`. I wonder how does this change the parsing behavior + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:127` + + +Yes. Time modifiers are converted conditions with `SearchComparison`. Any further operation that is applicable to search comparison, such as NOT, OR, AND, are applicable to time modifiers. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java:1010` + + +Yes, they can be accessed with backticks. I have added a test case for such scenarios and updated docs. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:125` + + +Thanks for pointing out! The documentation is updated. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Update: the problem is solved by creating a new lexer `TIME_SNAP` that matches `@ time_snap_unit` and placing it before `ID`. + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:70` + + +Is `relative_timestamp` correct place, can we move time modifiers to a separate md file? + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:53` + + +Just for clarification, `snap_unit` will always be start of what is specified and no relation to `now` right? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:None` + + +What is this handling? + + +### @vamsimanohar on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +You might need to rebase after https://github.com/opensearch-project/sql/pull/4334 + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:53` + + +can I got to start 2months before. + +`15m@2mon`? + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:905` + + +Is this an existing function? + + +### @yuancu on `docs/user/ppl/cmd/search.rst:53` + + +> Just for clarification, snap_unit will always be start of what is specified and no relation to now right? + +Span unit will be the start of what is specified. E.g. `@mon` align the time expression before it to the start of that month. + +In our implementation for week and quarter, it has some connection with the current time though. Because in opensearch datemath, there is no alignment to quarter, we have to manually calculate how many months to shift based on when is it now. For week, because the start of week is different in PPL and OpenSearch is difference, a now reference is also necessary to convert the week day shift. Please refer to `core/src/test/java/org/opensearch/sql/utils/DateTimeUtilsTest.java` for more details. + +> can I got to start 2months before. `15m@2mon`? + +Nope, you'll have to use something like `-15m-2month`, or `-15m-2month@month` if you want the alignment. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:None` + + +This is to make the existing [earliest and latest condition functions](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/condition.rst#earliest) also support formats like `YYYY-MM-DD HH:mm:ss` and `YYYY-MM-DDTHH:mm:ssZ`, it used to support only one format `MM/dd/yyyy:HH:mm:ss`, which is rather alien to PPL, compared with other date time functions. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +Thanks, removed + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:905` + + +Nope, it is implemented from scratch in this PR. + + +### @yuancu on `docs/user/ppl/cmd/search.rst:70` + + +Actually, this points to a documentation of opensearch Spark project. I did so because the [earliest condition function](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/condition.rst#earliest) also points to this doc. I thought it's not bad to have a single source of truth. Or do you think it would be better if we have a replicate of it in our repo in a separate file. + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:None` + + +can we loop through formatters instead of deep nested. It will be easy to add more in future. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:None` + + +Yes that would be better. Thanks for the suggestion + + +## General Comments + + +### @vamsimanohar + + +can you review this: https://github.com/opensearch-project/sql/pull/4152 +I am changing the grammar for search expression and its functionality. +We might need to reevaluate the implementation. + + + +### @yuancu + + +Hi @vamsimanohar, thank you for reminding! I think the functionality does not overlap, I'll re-implement this based on your grammar. + + +### @vamsimanohar + + +@yuancu Few things to keep in mind. + +` +We fundamentally want search command to be always push down and translate to `query_string` function in DSL which follows Lucene query syntax. Anything we add to search command should get translated to lucene query. +` + +I think it should be possible with your changes. + + +### @vamsimanohar + + +@yuancu hey can you update the description with latest implementation where you are translating to Lucene query. + + +### @vamsimanohar + + +@yuancu Did we cover every usecase mentioned in the description? Lets try to close this PR by end of the day. I can stay late and get on a call with you for review. + +You probably might need to refactor from this PR:https://github.com/opensearch-project/sql/pull/4334 + + +### @yuancu + + +> @yuancu Did we cover every usecase mentioned in the description? Lets try to close this PR by end of the day. I can stay late and get on a call with you for review. +> +> You probably might need to refactor from this PR:#4334 + +Unit test and explaining IT covers all of them. I did not cover some relative time modifier in integration test because they either have something to do with the current time, which may be flaky to mock and test. I'll try to cover more of them in the integration test. E.g. there exists unknown latency after I write a timestamp and before the query is executed. + + +--- + +# PR #4223: Support first/last aggregate functions for PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4223 + +**Author:** @ahkcs + +**Created:** 2025-09-04T00:40:19Z + +**State:** MERGED + +**Merged:** 2025-09-10T16:14:37Z + +**Changes:** +869 -9 (15 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +

    Add FIRST() and LAST() aggregate functions

    +

    Resolves: #4203

    +

    Summary

    +

    This PR implements FIRST(field) and LAST(field) for PPL , returning the first and last values of a field in natural index (document) order.

    +
      +
    • +

      New Aggregates: FIRST(field), LAST(field)

      +
    • +
    • +

      Document Order Processing: Natural document order

      +
    • +
    • +

      OpenSearch Pushdown: Translates to top_hits with size: 1 for optimal performance

      +
    • +
    • +

      Calcite Integration: Implemented as Calcite UDAFs

      +
    • +
    + +

    Function Specs

    +

    Add first/last aggregate function for PPL (Calcite)

    +

    FIRST Function

    +
      +
    • +

      Syntax: FIRST(field)

      +
    • +
    • +

      Description: Returns the first value of the specified field in natural document order.

      +
    • +
    • +

      Parameters:

      +
        +
      • +

        field (required): The field name to get the first value from.

        +
      • +
      +
    • +
    • +

      Return Type: Same as input field type (nullable).

      +
    • +
    • +

      Behavior:

      +
        +
      • +

        Returns the first occurrence of the field value in the result set (or the first per group when used with BY).

        +
      • +
      • +

        Uses natural document order (no sorting applied).

        +
      • +
      • +

        Returns NULL if no records exist, or if the field is NULL in the first record of the bucket.

        +
      • +
      +
    • +
    +

    LAST Function

    +
      +
    • +

      Syntax: LAST(field)

      +
    • +
    • +

      Description: Returns the last value of the specified field in natural document order.

      +
    • +
    • +

      Parameters:

      +
        +
      • +

        field (required): The field name to get the last value from.

        +
      • +
      +
    • +
    • +

      Return Type: Same as input field type (nullable).

      +
    • +
    • +

      Behavior:

      +
        +
      • +

        Returns the last occurrence of the field value in the result set (or the last per group when used with BY).

        +
      • +
      • +

        Uses reverse natural document order (equivalent to sorting by _doc DESC).

        +
      • +
      • +

        Returns NULL if no records exist, or if the field is NULL in the last record of the bucket.

        +
      • +
      +
    • +
    +
    +

    Usage Examples

    +
    -- Basic usage
    +source=logs | stats first(message), last(status) by host
    +
    
    +-- Combined with other aggregations
    +source=metrics | stats first(cpu_usage), last(memory_usage), count(), avg(response_time) by server
    +
    
    +-- Sequential processing after sorting
    +source=events | sort timestamp | stats first(event_type), last(event_data) by session_id
    + + + + + + + + + + +## Reviews + + +### @dai-chen - COMMENTED + + +A high level question: I see Calcite supports `FIRST/LAST_VALUE` as window function, can we simply register it as aggregate function? + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FirstLastParser.java:None` + + +I think Parser should be correspondent to DSL query. Can we make this generic for top hit parser? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +just add both to `statsFunctionName`? + + +### @dai-chen on `docs/user/ppl/cmd/stats.rst:None` + + +For this table, my understanding is how the aggregate function handle NULL. If we say "return NULL", that means we return NULL if we see any NULL value? + + +### @ahkcs on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FirstLastParser.java:None` + + +Moved the logic to TopHitsParser + + +### @ahkcs on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Updated + + +### @ahkcs on `docs/user/ppl/cmd/stats.rst:None` + + +Removed + + +### @ahkcs on `docs/user/ppl/cmd/stats.rst:None` + + +For `first` and `last`, it will return NULL if no records exist or field is NULL in last record, but it seems like this info shouldn't be in this table + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/FirstAggFunction.java:None` + + +Is this always guaranteed to run in order? Not sure if we have to worry about parallelism here + + +### @dai-chen on `docs/user/ppl/cmd/stats.rst:None` + + +Could you confirm if this align with SPL or SPL will ignore null and return first non-null if any. Also please clarify this in user manual. Thanks! + + +### @ahkcs on `docs/user/ppl/cmd/stats.rst:None` + + +After confirmation, it turns out that SPL will return the first non-null value for `first` and `last`, I have updated the implementation and documentation to comply with that + + +### @ahkcs on `docs/user/ppl/cmd/stats.rst:None` + + +I have added UT for testing null handling + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/FirstAggFunction.java:None` + + + Good catch! I've made both functions thread-safe by adding synchronized setValue() and volatile fields + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/FirstAggFunction.java:None` + + +`!acc.hasValue` is already done inside Accumulator, probably we can remove the check here as well as the getter? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +These looks like very similar test. Maybe only keep this one? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:327` + + +try to add UT for this? + + +### @ahkcs on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:327` + + +Added UT + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Removed `testExplainOnFirstLastDifferentFields` + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/FirstAggFunction.java:None` + + +Removed + + +### @dai-chen on `docs/user/ppl/cmd/stats.rst:None` + + +np: I see some examples here and below do both `first` and `last`? + + +### @dai-chen on `opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java:None` + + +np: If you want, probably you can reuse helper class `AggregationTestBuilder` below. Does line 794 throw exception in the case of null Project? + + +### @ahkcs on `docs/user/ppl/cmd/stats.rst:None` + + +Removed `last` in this example + + +### @ahkcs on `opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java:None` + + +Thanks for the suggestion! I've refactored both analyze_firstAggregation() and analyze_lastAggregation() tests to use the AggregationTestBuilder helper class. +Regarding line 794 - It would throw a ClassCastException when the aggregate input is a LogicalTableScan instead of a Project. I've fixed this by adding proper type checking + + +## General Comments + + +### @ahkcs + + +> A high level question: I see Calcite supports `FIRST/LAST_VALUE` as window function, can we simply register it as aggregate function? + +That’s a great question! Calcite does support `FIRST_VALUE` and `LAST_VALUE` as window functions. After experimenting, though, we found that while registration and logical plan generation work fine, Calcite doesn’t provide the necessary conversion rules to run them in aggregate contexts. This leads to a `CannotPlanException: Missing conversion is LogicalAggregate[convention: NONE -> ENUMERABLE]`. + +In practice, that means we’d need to build additional Calcite infrastructure to bridge this gap—which would end up being more effort than using our current UDAF approach. So I think while it’s technically possible, sticking with UDAF is the more straightforward path for now in my opinion. + + + + +--- + +# PR #4222: Don't recreate indices on every doctest + +**URL:** https://github.com/opensearch-project/sql/pull/4222 + +**Author:** @Swiddis + +**Created:** 2025-09-03T22:51:54Z + +**State:** MERGED + +**Merged:** 2025-09-05T19:11:47Z + +**Changes:** +25 -6 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Since I've been working on build timing lately: the vast majority of our time in doctest is spent repeatedly creating and destroying test indices with `refresh=wait_for`. + +Originally I just wanted to rewrite this to run all the indexing in parallel, but it turns out: our doctests don't do any index modifications. There's no real reason to be repeatedly destroying and reindexing all this data. Disabling this behavior (in addition to parallel index creation) makes doctest run in ~13 seconds locally: + +``` +/workplace/sawiddis/sql/doctest/../docs/user/dql/expressions.rst +Doctest: expressions.rst ... ok +/workplace/sawiddis/sql/doctest/../docs/user/general/comments.rst +Doctest: comments.rst ... ok +... +/workplace/sawiddis/sql/doctest/../docs/user/ppl/cmd/stats.rst +Doctest: stats.rst ... ok + +---------------------------------------------------------------------- +Ran 54 tests in 12.838s +``` + +(Also, I found a missing task dependency that was causing `./gradlew -x integTest --parallel` to fail to run doctest) + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ykmr1224 - APPROVED + + +Thanks for the improvement! + + +## Review Comments + + +### @ps48 on `doctest/test_docs.py:302` + + +Thanks for the PR I was thinking about improving this as well. Have a question, will this lead to cascading test failures if the first test failed to load indexes? + + +### @Swiddis on `doctest/test_docs.py:302` + + +We run doctest with `--fastfail` so if the first test failed, the rest wouldn't even run. Same behavior as now, if any individual run has an index failure. (Here, the indexing exception would be raised via `future.result()`) + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4221: [Backport 2.19-dev] Add EARLIEST/LATEST aggregate functions for PPL (#4100) + +**URL:** https://github.com/opensearch-project/sql/pull/4221 + +**Author:** @ykmr1224 + +**Created:** 2025-09-03T21:07:10Z + +**State:** MERGED + +**Merged:** 2025-09-04T16:45:35Z + +**Changes:** +1128 -437 (26 files) + + +## Description + +backport https://github.com/opensearch-project/sql/commit/e2a1132018b260017e0c387122ef6d8523726c94 from https://github.com/opensearch-project/sql/pull/4100 + +(manual backport due to minor conflicts) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +doctest is failing due to `regex.rst` not found. +It is because https://github.com/opensearch-project/sql/pull/4218 haven't merged. I will rebase after it is merged. + +``` +> Task :doctest:doctest FAILED +====================================================================== +ERROR: test_docs (unittest.loader._FailedTest.test_docs) +---------------------------------------------------------------------- +Traceback (most recent call last): + File "/Users/runner/work/sql/sql/doctest/test_docs.py", line 343, in load_tests + tests.append(get_test_suite(category_manager, category_name, get_doc_filepaths(docs))) + ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/runner/work/sql/sql/doctest/test_docs.py", line 355, in get_test_suite + return create_cli_suite(filepaths, parser, setup_func) + File "/Users/runner/work/sql/sql/doctest/test_docs.py", line 327, in create_cli_suite + return docsuite( + *filepaths, + parser=parser, + setUp=setup_func + ) + File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/doctest.py", line 2633, in DocFileSuite + suite.addTest(DocFileTest(path, **kw)) + ~~~~~~~~~~~^^^^^^^^^^^^ + File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/doctest.py", line 2555, in DocFileTest + doc, path = _load_testfile(path, package, module_relative, + ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + encoding or "utf-8") + ^^^^^^^^^^^^^^^^^^^^ + File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/doctest.py", line 253, in _load_testfile + file_contents = loader.get_data(filename) + File "", line 1217, in get_data +FileNotFoundError: [Errno 2] No such file or directory: '/Users/runner/work/sql/sql/doctest/../docs/user/ppl/cmd/regex.rst' + +``` + + +### @ykmr1224 + + +Rebased. + + +--- + +# PR #4219: Doctest: Use 1.0 branch of CLI instead of main + +**URL:** https://github.com/opensearch-project/sql/pull/4219 + +**Author:** @Swiddis + +**Created:** 2025-09-03T20:37:33Z + +**State:** MERGED + +**Merged:** 2025-09-12T02:44:27Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `infrastructure`, `backport 2.19-dev` + + +## Description + +### Description +We're releasing SQL CLI 2.0 soon. This changes the output format of commands, which will cause disruption for the current doctest action. I've created a new `1.0-legacy` branch that contains the contents of the current `main` branch, so doctest won't be affected by these updates. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ykmr1224 - APPROVED + + +nit: let's clarify it is about sql-cli version in the commit message (title of the PR). + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4218: [Backport 2.19-dev] Implementation of `regex` Command In PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4218 + +**Author:** @RyanL1997 + +**Created:** 2025-09-03T20:16:45Z + +**State:** MERGED + +**Merged:** 2025-09-03T22:58:56Z + +**Changes:** +944 -25 (25 files) + + +## Description + +### Description +Manual backport + +### Related Issues +* Relate https://github.com/opensearch-project/sql/pull/4083 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +Can you apply these changes? That should fix the CI failure. +https://github.com/opensearch-project/sql/pull/4214 +https://github.com/opensearch-project/sql/pull/4215 + + + +--- + +# PR #4217: `mvjoin` support in PPL Caclite + +**URL:** https://github.com/opensearch-project/sql/pull/4217 + +**Author:** @ps48 + +**Created:** 2025-09-03T20:14:50Z + +**State:** MERGED + +**Merged:** 2025-09-15T20:33:14Z + +**Changes:** +325 -1 (12 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR adds support for the mvjoin function in PPL with Apache Calcite integration. The mvjoin function concatenates the elements of a multi-value field into a single string using a specified delimiter. This is useful for converting multi-value fields into a readable string format. + +Syntax: `mvjoin(multivalue_field, delimiter)` + +Example: +``` +source=logs | eval joined_values = mvjoin(array('apple', 'banana', 'cherry'), ', ') +// Result: "apple, banana, cherry" +``` + +### Related Issues +Resolves #4146 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +This feels like too much testing for something that's ultimately just an alias -- how unique is mvjoin compared to array_join really? + + +## Review Comments + + +### @dai-chen on `docs/user/ppl/functions/collection.rst:None` + + +I see the first argument is multi-value field in SPL. So we consider both string and array as mv field? If so, shall we return `h,e,l,l,o`? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java:252` + + +Do we need IT for real field with array value? + + +### @ps48 on `docs/user/ppl/functions/collection.rst:None` + + +As far as I know, SPL does work in the same way as my current implementation. Anything that comes as a single string is return as it. So `mvjoin('hello', ',')` will come out as `hello` + + +### @ps48 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java:252` + + +yes adding this now + + + +### @ps48 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java:252` + + +updated here: https://github.com/opensearch-project/sql/pull/4217/commits/d0be08188eb02a601867b5a6d1193dad5a65c789 + + +### @dai-chen on `docs/user/ppl/functions/collection.rst:None` + + +Okay, do you know what's the use? Not sure if we need to register this function implementation or not.. + + +### @ps48 on `docs/user/ppl/functions/collection.rst:None` + + +@dai-chen yes it makes sense to remove this. I don't see a use-case either for this one. + + +## General Comments + + +### @ps48 + + +> This feels like too much testing for something that's ultimately just an alias -- how unique is mvjoin compared to array_join really? + +I was just following our guidelines from https://github.com/opensearch-project/sql/blob/main/DEVELOPER_GUIDE.rst#development-guidelines 😄 There is one difference, that we can pass a single string field/value and that will be returned as is, whereas the `ARRAY_JOIN` would just work with arrays and not scalar values. + + +### @dai-chen + + +Please fix failing CI. + + +--- + +# PR #4216: Add merge_group trigger to test workflows + +**URL:** https://github.com/opensearch-project/sql/pull/4216 + +**Author:** @Swiddis + +**Created:** 2025-09-03T19:04:45Z + +**State:** MERGED + +**Merged:** 2025-09-17T23:22:56Z + +**Changes:** +2 -0 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Re: #4215, it seems prudent to actually act on #3606 now -- since automerge relies on branch protections to block on CI, but enabling branch protections globally means non-admins can never override them. If we keep relying on automerge, that means humans need to monitor PRs for approvals and passing CI by hand, which sort-of invalidates the point of automerge. Queues let us actually fire & forget PR merges, and merge many PRs at the same time without blocking. + +### Related Issues +#3606 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +So PR ready to merge will stay in the queue until manual approved? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Swiddis + + +> So PR ready to merge will stay in the queue until manual approved? + +The merge queue needs to actually be enabled, see https://github.com/opensearch-project/.github/issues/381 + +But the flow is that once the PR goes in the queue, it will try to merge it with `main` and run tests, and it will reject the PR if tests fail. If the tests pass, it's actually merged. That solves a few problems for us: +- We can no longer accidentally merge PRs with failing tests, the queue would need to be manually overrided (which it usually shouldn't) +- We can hit "merge" on PRs with failing tests if we believe `main` has a fix, since it'll test in the queue before actually merging (and cancel the merge if tests are still failing after the fix) +- We can safely bulk-merge without worrying about conflicts between PRs (any issues will be kicked out before the actual merge) + + + +--- + +# PR #4215: Fix AnalyzerTest + +**URL:** https://github.com/opensearch-project/sql/pull/4215 + +**Author:** @ykmr1224 + +**Created:** 2025-09-03T18:07:25Z + +**State:** MERGED + +**Merged:** 2025-09-03T19:08:26Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `maintenance`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +- Fix AnalyzerTest +- Follow up for https://github.com/opensearch-project/sql/pull/4214 + + + +## Reviews + + +### @dai-chen - APPROVED + + +Do we know why this and previous fix was not captured by our CI? Because our CI is running without latest changes from `main` branch? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +> Do we know why this and previous fix was not captured by our CI? Because our CI is running without latest changes from `main` branch? + +We merged manually before CI completed... + + +### @Swiddis + + +Good case study for #3606 + +Should we merge this now or wait for CI this time? + + +### @Swiddis + + +> We merged manually before CI completed... + +Actually, how? That PR was auto-merged, automerge should only run if the CI passes + +Maybe some other conflict (in which case, yeah, #3606) + + +### @ykmr1224 + + +> > We merged manually before CI completed... +> +> Actually, how? That PR was auto-merged, automerge should only run if the CI passes +> +> Maybe some other conflict (in which case, yeah, #3606) + +Oh, I saw it was merged by your name. +Was that auto-merge? + + +### @ykmr1224 + + +I suppose auto-merge won't wait for all the CI tasks. + + +### @dai-chen + + +> I suppose auto-merge won't wait for all the CI tasks. + +Is this true? @Swiddis + + +### @ykmr1224 + + +I see I can already merge this change even though Java CIs haven't finished. + + +### @Swiddis + + +> Oh, I saw it was merged by your name. +> Was that auto-merge? + +Yeah, automerge blames whoever enabled it :/ + + +### @ykmr1224 + + +This change is backported by https://github.com/opensearch-project/sql/pull/4218 + + +--- + +# PR #4214: Fix issue caused by merge in Analyzer.java + +**URL:** https://github.com/opensearch-project/sql/pull/4214 + +**Author:** @ykmr1224 + +**Created:** 2025-09-03T17:17:22Z + +**State:** MERGED + +**Merged:** 2025-09-03T17:45:29Z + +**Changes:** +1 -2 (1 files) + +**Labels:** `maintenance`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +- Fix issue caused by merge in Analyzer.java +- Caused by this PR: https://github.com/opensearch-project/sql/pull/4150 + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +backport after https://github.com/opensearch-project/sql/pull/4150 + + +### @ykmr1224 + + +https://github.com/opensearch-project/sql/pull/4083 have not backported to 2.19-dev, and that is causing backport failure. + + +### @ykmr1224 + + +This change is backported by https://github.com/opensearch-project/sql/pull/4218 + + +--- + +# PR #4213: Enable pushdown optimization for filtered aggregation + +**URL:** https://github.com/opensearch-project/sql/pull/4213 + +**Author:** @dai-chen + +**Created:** 2025-09-03T16:53:08Z + +**State:** MERGED + +**Merged:** 2025-09-05T21:19:41Z + +**Changes:** +492 -17 (11 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR implements pushdown optimization for aggregation with filtering conditions. This enhancement complements the recently added `count(eval)` expression support (PR #4103) and improves overall query performance for any filtered aggregation. + +#### Changes + +In addition to implementing a new `AggregateFilterAnalyzer`, this PR includes minor modifications to handle the `IS_TRUE` operator in `PredicateAnalyzer`. This change is necessary because Calcite's optimizer typically wraps predicate expressions with `IS_TRUE` to ensure proper null handling in SQL's three-valued logic (as seen in `CASE` statements generated by `count(eval)` expression below). + +#### Examples + +
    +# PPL query
    +source=accounts | stats count(eval(age = 31)) as cnt
    +
    +# Logical plan (before Calcite optimization)
    +LogicalAggregate(group=[{}], cnt=[COUNT($0)])
    +  LogicalProject($f0=[CASE(=($2, 31), 1, null:NULL)])
    +    LogicalTableScan(table=[[accounts]])
    +
    +# Logical plan (after Calcite optimization)
    +LogicalAggregate(group=[{}], cnt=[COUNT() FILTER $0])
    +  LogicalProject($f0=[IS TRUE($1 = 31)])
    +    LogicalTableScan(table=[[accounts]])
    +
    +# DSL query generated by our optimization
    +{
    +  ...
    +  "aggregations": {
    +    "cnt": {
    +      "filter": {
    +        "term": {
    +          "age": {
    +            "value": 31,
    +            "boost": 1.0
    +          }
    +        }
    +      },
    +      "aggregations": {
    +        "cnt": {
    +          "value_count": {
    +            "field": "_index"
    +          }
    +        }
    +      }
    +    }
    +  }
    +}
    +
    + +#### Todo + +Will add benchmark query and result in separate PR if needed. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3949 (partially, and will publish separate PR for `distinct_count(eval)` support). + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +For DSL query generated by our optimization, does it support script query in the `filter` as well? If it does, maybe we should add a test for that as the complex test case. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +It does support. Sure, let me add one. Thanks! + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1394` + + +no test failed after remove query buildler? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1394` + + +yes, because I found visit(field) will return a `NamedExpression` which is not part of this `QueryExpression` abstraction. My understanding is this is not triggered unless we explicit `QueryExpression.create(field).istrue()` instead of following visitor pattern. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Addressed in [8f8818c](https://github.com/opensearch-project/sql/pull/4213/commits/8f8818ca140475750cb1a947a240dc302b58cc78) by adding 2 more tests in UT. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4212: Add earliest/latest aggregate function for eventstats PPL command + +**URL:** https://github.com/opensearch-project/sql/pull/4212 + +**Author:** @ykmr1224 + +**Created:** 2025-09-03T16:51:49Z + +**State:** MERGED + +**Merged:** 2025-09-11T18:53:00Z + +**Changes:** +1297 -567 (32 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +- Add earliest/latest aggregate function for eventstats PPL command +- I found argument count/type validation for window functions were not working, and added verification + tests for it. + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4047 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - APPROVED + + +Hi @ykmr1224 , thanks for the change and I just left some questions. + + +### @Swiddis - APPROVED + + +lgtm with possible improvements + + +## Review Comments + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +is earliest and latest function already in `statsFunctionName`? + + +### @dai-chen on `docs/user/ppl/cmd/eventstats.rst:96` + + +Thanks for the fix! This is much clear with fewer fields in example. + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Could you clarify why we need special logic for earliest/latest? I thought it's the same as other regular window function. + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +No, it is defined separately in `earliestLatestFunction` + + +### @ykmr1224 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +It is to handle default time field, but let me see if I can improve it... + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Is this possible with ANTLR? + + +### @dai-chen on `docs/user/ppl/cmd/eventstats.rst:224` + + +Just wonder is there any way to avoid adding a sort in each example query? Why some queries below doesn't require sorting? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:582` + + +The change from `valueExpression` to `functionArgs` will allow logical expression as stats function input right? If this is intentional, could you double check if any impact or test to cover this? + + +### @RyanL1997 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:271` + + +Just being curious, Where is the `@timestamp` default handled now? The documentation still claims it defaults to `@timestamp`, but I don't see this logic in the new implementation. + + +### @RyanL1997 on `docs/user/ppl/cmd/eventstats.rst:312` + + +But looking at the test expectations: + +`earliest_message=[ARG_MIN($2, $3) OVER ()]` + +Where `$3` appears to be `@timestamp` field. How is this default determined? There's no explicit logic I found that says "if only one argument, use `@timestamp` as second argument." + + +### @ykmr1224 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:271` + + +Very good question. It is handled in [PPLFuncImpTable](https://github.com/opensearch-project/sql/blob/main/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java#L1060C28-L1060C44) + + +### @ykmr1224 on `docs/user/ppl/cmd/eventstats.rst:312` + + +see above comment + + +### @ykmr1224 on `docs/user/ppl/cmd/eventstats.rst:224` + + +I think the inconsistent order is coming from OpenSearch query result. We need dive deep to come up with consistent order. + +Fixed examples to have sort command. + + +### @ykmr1224 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Nice catch! Fixed. + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:582` + + +I am checking the validation, and found parameter was not validated for window functions. Working on the fix. + + +### @dai-chen on `docs/user/ppl/cmd/eventstats.rst:224` + + +Okay, just thinking can we handle this in test framework itself, otherwise all tests requires sorting? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:536` + + +Is this validation missing for all window function because we bypass Calcite SQL validator? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1213` + + +We may reuse this in any function with implicit @timestamp field? https://github.com/opensearch-project/sql/pull/4138 + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:536` + + +It is because we are directly converting window functions into other built-in function and passing only required parameters. Unneeded parameters were just ignored during that process and didn't raise error. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1213` + + +This method relies on the position of time field param, and cannot be directly reused, but we should unify the logic to refer implicit(default) timestamp field. + +Added tracking issue: https://github.com/opensearch-project/sql/issues/4275 + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:582` + + +fixed. + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:408` + + +suggestion: Consider using an `AggFunctionSignature` class instead of shotgun validation + +From a safety standpoint, it would be better design to create a record type and construct it with validation. Then any methods that require the function signature can take an `AggFunctionSignature`, and it's guaranteed that the input is valid since you can't construct the object otherwise. + +Otherwise, we rely on the rest of the code to "just know" whether the input is already validated or not. + + +### @Swiddis on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:131` + + +question: Could this be an annotation instead of a method? + +Might be nicer to read, but not sure how involved creating custom annotations is. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:131` + + +That's a good point. I think we can do that. Let's me take a look and address in a separate PR if it is feasible (and reasonable). + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:408` + + +It is actually using the signature coming from function registry (this is just delegating validation). It needs to be done here since we are converting some aggregate/window function to different ones, and add/remove/change attributes. + + +## General Comments + + +### @dai-chen + + +CI is failing on doctest? + + +### @ykmr1224 + + +> CI is failing on doctest? + +Oh, seems I did wrong while merging and `category.json` was invalid... fixed now. + + +### @dai-chen + + +Hi @xinyual Could you help take another look since you worked on this before? + + +### @xinyual + + +> Hi @xinyual Could you help take another look since you worked on this before? + +LGTM + + +### @ykmr1224 + + +I found argument count/type validation for window functions were not working, and added verification + tests for it. + + +### @LantaoJin + + +@yuancu , could you review this PR? + + +### @ykmr1224 + + +@yuancu merging now, but please let me know if you see any risk or concern. + + +--- + +# PR #4211: [Backport 2.19-dev] Extract getOnlyForCalciteException + +**URL:** https://github.com/opensearch-project/sql/pull/4211 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-03T16:06:30Z + +**State:** MERGED + +**Merged:** 2025-09-03T18:13:25Z + +**Changes:** +45 -69 (5 files) + + +## Description + +Backport 60a9e9e19d113bc776a594860a89d8296f449c43 from #4150. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4204: [Backport 2.19-dev] Adding big5 `coalesce` queries + +**URL:** https://github.com/opensearch-project/sql/pull/4204 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-02T23:15:56Z + +**State:** MERGED + +**Merged:** 2025-09-03T01:17:03Z + +**Changes:** +12 -1 (3 files) + + +## Description + +Backport ff3dfdc8556eb2bb3d15ba5f47fab33a9b225cb7 from #4127. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4202: [Backport 2.19-dev] Add asc keyword to sort command + +**URL:** https://github.com/opensearch-project/sql/pull/4202 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-09-02T22:20:46Z + +**State:** MERGED + +**Merged:** 2025-09-03T15:51:59Z + +**Changes:** +176 -4 (6 files) + + +## Description + +Backport 6db8d80ede11bf5c30ad61dc168e99aa464f5cf2 from #4113. + + + +## Reviews + + +### @noCharger - APPROVED + + +How to handle NULL / MISSING for asc? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4195: [Backport 2.19-dev] Updating documentation for `fields` and `table` commands + +**URL:** https://github.com/opensearch-project/sql/pull/4195 + +**Author:** @aalva500-prog + +**Created:** 2025-09-02T20:14:34Z + +**State:** MERGED + +**Merged:** 2025-09-03T01:19:52Z + +**Changes:** +237 -175 (5 files) + + +## Description + +Backport [cc9c8d9](https://github.com/opensearch-project/sql/commit/cc9c8d9351bdb98042d56d92e36bd8a6058a4f3e) from #4177 + +(Manually backporting due to conflicts) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4194: Add issue template for doc issues + +**URL:** https://github.com/opensearch-project/sql/pull/4194 + +**Author:** @Swiddis + +**Created:** 2025-09-02T19:59:05Z + +**State:** MERGED + +**Merged:** 2025-09-03T21:03:24Z + +**Changes:** +20 -0 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +We have [a growing number of documentation issues](https://github.com/opensearch-project/sql/issues?q=sort%3Aupdated-desc%20is%3Aissue%20is%3Aopen%20label%3Adocumentation), these are currently under feature requests but that's not quite right, and non-maintainers can't change the labels after issue creation. This copies the documentation issue template from `documentation-website`, so external contributors can create requests for doc fixes. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4193: Split up our test actions into unit, integ, and doctest. + +**URL:** https://github.com/opensearch-project/sql/pull/4193 + +**Author:** @Swiddis + +**Created:** 2025-09-02T19:35:31Z + +**State:** MERGED + +**Merged:** 2025-09-05T21:59:08Z + +**Changes:** +2864 -2611 (18 files) + +**Labels:** `infrastructure`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Re: discussion in #4190, this PR does a few things for faster builds: + +- Break up the build actions into separate build, integ, and doctest steps + - Doctest doesn't "need" to be split anymore since it's fast now, but it's a semi-flaky suite that sees quite a few issues lately, so I think there's value in keeping it separate +- Breaks up the slowest 3 async query test suites (since they were the slowest part of the build) + - No test changes, just lots of shuffling between suites +- Enables parallel builds by default after #4222 fixed a hidden dependency for doctest + +~~The main concern with this is I think codecov needs both unit and integtest to run. (At least it's defined like that in the build files, but I'm not sure how we're actually counting the code for integ test.)~~ Nevermind, codecov doesn't run anyway. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - DISMISSED + + +It's great the CI can be finished in 30 mins. The 2.19-dev branch also needs it, but auto-backport may not work for this. + + +## Review Comments + + +### @RyanL1997 on `async-query/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperBase.java:6` + + +header missing + + +## General Comments + + +### @Swiddis + + +> The main concern with this is I think codecov needs both unit and integtest to run. + +Actually, it looks like this has been silently failing for a while. [The last codecov update was December 2024](https://app.codecov.io/gh/opensearch-project/sql). + + +### @Swiddis + + +upd: Following #4222, we can now enable parallel builds by default, turns out this is the hidden dependency that was causing issues + + +### @Swiddis + + +Build times in CI are now under 20 minutes. :) + + +--- + +# PR #4191: [Backport 2.19-dev] Add documents on how to develop a UDF / UDAF (#4094) + +**URL:** https://github.com/opensearch-project/sql/pull/4191 + +**Author:** @ykmr1224 + +**Created:** 2025-09-02T16:49:51Z + +**State:** MERGED + +**Merged:** 2025-09-03T03:29:55Z + +**Changes:** +503 -245 (13 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/1ae4f288a3d6ec59fb5234b551e5cd6293cc3c8f from https://github.com/opensearch-project/sql/pull/4094. +(Manual backport due to conflict) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +It is blocking backport https://github.com/opensearch-project/sql/pull/4100 + + +### @LantaoJin + + +@ykmr1224 thanks for backporting this. @yuancu , this PR not only includes documents but also code refactor, we missed it in 2.19-dev. + + +### @yuancu + + +@ykmr1224 Thanks for help backport this PR! I'm sorry for missing this backport. + + +--- + +# PR #4190: Run unit test suites in parallel + +**URL:** https://github.com/opensearch-project/sql/pull/4190 + +**Author:** @Swiddis + +**Created:** 2025-09-02T15:55:51Z + +**State:** MERGED + +**Merged:** 2025-09-03T21:50:05Z + +**Changes:** +14 -1 (12 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Our unit tests don't have any cross-dependency, so we can freely turn on parallelization for them. Makes builds a bit faster locally. (At least the parts where most of the feedback is -- async is still slow but now it runs at the end instead of the start, and will rarely fail when working on anything that isn't specifically async.) + +### Related Issues +Slow local builds. I want `./gradlew -x doctest -x integTest --parallel` to be fast. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - COMMENTED + + +Thanks. +Could u update doc also. https://github.com/opensearch-project/sql/blob/main/DEVELOPER_GUIDE.rst#building-and-running-tests + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @vamsimanohar + + +I haven't tested out but thanks for doing this. +These small things save a lot of time. The other day maven repository order killed my time. + + +### @LantaoJin + + +Does this fixing save time of integ-test for building locally with `--parallel` only, or does it reduce the time of workflow in GitHub CI either? + + +### @Swiddis + + +It reduces the github CI but not very much -- the individual tasks are parallelized now (so e.g. `core:test` will be faster), but the tasks are globally still serial. It has the most effect locally where you can `-x integTest --parallel`. + +The problem with enabling parallelism between tasks right now is it seems there's an invisible dependency for `integTest`, so just `gradle build --parallel` is failing (even though `integTest` itself is serial). I've been meaning to figure out what it is. The high CPU usage also causes issues for doctest. In the meantime, I also want to make integTest itself parallel, but [haven't been able to get it to even try](https://github.com/opensearch-project/OpenSearch/issues/19186). + +Something we could do is break up our testing action into unit tests, integTest, and doctest as three different actions. Then we run `./gradlew integTest`, `./gradlew doctest`, and `./gradlew build -x integTest -x doctest --parallel`. That'd give us much faster CI overall. + + +### @Swiddis + + +> Something we could do is break up our testing action into unit tests, integTest, and doctest as three different actions. Then we run ./gradlew integTest, ./gradlew doctest, and ./gradlew build -x integTest -x doctest --parallel. That'd give us much faster CI overall. + +#4193, looks like it works. If preferred, close this in favor of that, but I'd rather get this merged quicker since it has immediate benefit for local development, and don't want to block this on any issues with those (riskier) CI updates. + + +### @Swiddis + + +Test failures pending #4215, was passing before doc update + + +--- + +# PR #4187: add paths + +**URL:** https://github.com/opensearch-project/sql/pull/4187 + +**Author:** @ahkcs + +**Created:** 2025-08-30T00:21:36Z + +**State:** MERGED + +**Merged:** 2025-08-30T00:21:50Z + +**Changes:** +3 -0 (3 files) + + +## Description + +add paths + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4186: Add logs to check snapshot uploading + +**URL:** https://github.com/opensearch-project/sql/pull/4186 + +**Author:** @ahkcs + +**Created:** 2025-08-30T00:15:25Z + +**State:** MERGED + +**Merged:** 2025-08-30T00:15:39Z + +**Changes:** +56 -206 (3 files) + + +## Description + +Add logs to check snapshot uploading + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4184: test reroute strategy + +**URL:** https://github.com/opensearch-project/sql/pull/4184 + +**Author:** @ahkcs + +**Created:** 2025-08-29T23:35:19Z + +**State:** MERGED + +**Merged:** 2025-08-29T23:35:32Z + +**Changes:** +72 -7 (1 files) + + +## Description + +test reroute strategy + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4183: reroute commit-history file + +**URL:** https://github.com/opensearch-project/sql/pull/4183 + +**Author:** @ahkcs + +**Created:** 2025-08-29T23:14:55Z + +**State:** MERGED + +**Merged:** 2025-08-29T23:15:10Z + +**Changes:** +139 -10 (3 files) + + +## Description + +reroute commit-history file + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4182: Fix CI artifact sources (#4131) + +**URL:** https://github.com/opensearch-project/sql/pull/4182 + +**Author:** @ahkcs + +**Created:** 2025-08-29T22:46:10Z + +**State:** MERGED + +**Merged:** 2025-08-29T22:46:23Z + +**Changes:** +176 -182 (2 files) + + +## Description + +Cherry pick Fix CI artifact sources (#4131) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4181: Reroute commit-history file + +**URL:** https://github.com/opensearch-project/sql/pull/4181 + +**Author:** @ahkcs + +**Created:** 2025-08-29T22:33:56Z + +**State:** MERGED + +**Merged:** 2025-08-29T22:35:07Z + +**Changes:** +18 -20 (6 files) + + +## Description + +Reroute commit-history file + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4180: Reroute commit-history file + +**URL:** https://github.com/opensearch-project/sql/pull/4180 + +**Author:** @ahkcs + +**Created:** 2025-08-29T21:59:26Z + +**State:** MERGED + +**Merged:** 2025-08-29T21:59:50Z + +**Changes:** +25 -29 (4 files) + + +## Description + +Reroute commit-history file + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4179: reroute commit-history + +**URL:** https://github.com/opensearch-project/sql/pull/4179 + +**Author:** @ahkcs + +**Created:** 2025-08-29T21:37:38Z + +**State:** MERGED + +**Merged:** 2025-08-29T21:44:44Z + +**Changes:** +140 -12 (3 files) + + +## Description + +reroute commit-history file + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4178: [Backport 2.19-dev] Enable single doctest + +**URL:** https://github.com/opensearch-project/sql/pull/4178 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-29T20:49:59Z + +**State:** MERGED + +**Merged:** 2025-09-02T05:33:46Z + +**Changes:** +344 -10 (3 files) + + +## Description + +Backport d1a1519ce32fc21cf6161d5f8e1987ab07e8d988 from #4130. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4177: Updating documentation for `fields` and `table` commands + +**URL:** https://github.com/opensearch-project/sql/pull/4177 + +**Author:** @aalva500-prog + +**Created:** 2025-08-29T20:09:36Z + +**State:** MERGED + +**Merged:** 2025-09-02T19:28:32Z + +**Changes:** +232 -174 (5 files) + +**Labels:** `documentation`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Updating documentation foe `fields` and `table` commands. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ykmr1224 on `docs/user/ppl/cmd/fields.rst:None` + + +This needs to be `os>` to be executed in doctest. + + +### @ykmr1224 on `docs/user/ppl/cmd/fields.rst:30` + + +I think we use `=` + + +### @ykmr1224 on `docs/user/ppl/cmd/table.rst:None` + + +Let's link to `fields` doc from here. +And I think we want to rather simply ask reader to check `fields` doc to avoid future inconsistency of the docs (I can easily imagine we update only `fields` doc and forget updating `table` doc) + + +### @aalva500-prog on `docs/user/ppl/cmd/fields.rst:None` + + +@ykmr1224 I think I will have to add the `fields.rst` to calcite category then for the new improvements your made in doctest? + + +### @aalva500-prog on `docs/user/ppl/cmd/table.rst:None` + + +I don't think the doctest actually test the `table` command. Should we add it? + + +### @aalva500-prog on `docs/user/ppl/cmd/fields.rst:30` + + +@ykmr1224 I'm using `-------------` here because this is as sub-header of the "Basic Examples", which is using `======`. The same thing is happening in the "Enhanced Features (Version 3.3.0)". If I use `======`, they will be at the same level. I want to highlight/separate concerns between the basic functionalities and the new enhancements for better understanding. For example: + +Basic Examples +============== + +Example 1: Select specified fields from result +---------------------------------------------- + +Example 2: Remove specified fields from result +---------------------------------------------- + +Enhanced Features (Version 3.3.0) +=========================================== + +Example 3: Space-delimited field selection +------------------------------------------- + +Example 4: Prefix wildcard pattern +----------------------------------- + + +### @ykmr1224 on `docs/user/ppl/cmd/fields.rst:30` + + +I see. We might want to fix other files, then. + + +### @ykmr1224 on `docs/user/ppl/cmd/table.rst:None` + + +I mean removing all the details from `table` doc, and ask reader to go to `fields` doc to avoid redundancy and inconsistency in the future. + + +### @aalva500-prog on `docs/user/ppl/cmd/table.rst:None` + + +Makes sense, let's confirm with Anas next week, thanks! + + +### @aalva500-prog on `docs/user/ppl/cmd/fields.rst:30` + + +Maybe we can keep all of them at the same level, as you suggest. However, let's confirm with Anas next week. I think he mentioned that we should mark new features that need Calcite with 3.3+ version explicitly in the documentation. + + +### @Swiddis on `docs/user/ppl/index.rst:113` + + +Instead of a separate doc we could also just have this link into `fields`, not too important though. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4175: Fix doctest requirements install + +**URL:** https://github.com/opensearch-project/sql/pull/4175 + +**Author:** @Swiddis + +**Created:** 2025-08-29T19:09:10Z + +**State:** MERGED + +**Merged:** 2025-08-29T21:45:22Z + +**Changes:** +3 -2 (3 files) + +**Labels:** `maintenance` + + +## Description + +### Description +We already install setuptools as part of bootstrap, reinstalling a new version causes breakage with some python setups (particularly when the newer version is unavailable). On the other hand, doctest depends on click, which is unspecified. + +Fixes some setups being unable to run doctest locally (I don't know how this has been passing in CI). + +Also, async-query unit tests are independent. Enabling forking for it makes `./gradlew build --parallel -x doctest -x integTest` run 5x as fast locally. (Most of it is `IndexQuerySpecTest` and `IndexQuerySpecAlterTest`, we could split these classes into smaller chunks) + +Also, don't run spotless on generated antlr output. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4174: [Backport 2.19-dev] Add count alias to count() + +**URL:** https://github.com/opensearch-project/sql/pull/4174 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-29T18:43:19Z + +**State:** MERGED + +**Merged:** 2025-08-29T20:52:42Z + +**Changes:** +133 -12 (7 files) + + +## Description + +Backport 52476229d18b25f3ea207f29d6bf368fa580cd82 from #4157. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4172: [Backport 2.19] Fix value parsing bug. + +**URL:** https://github.com/opensearch-project/sql/pull/4172 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-29T01:11:12Z + +**State:** MERGED + +**Merged:** 2025-10-30T16:25:35Z + +**Changes:** +117 -7 (3 files) + + +## Description + +Backport fff3e3a2f2a0a7974a871efb76363e54e55bb5bc from #4095. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +@ishaoxy Can you take a look at the build failure? + + +### @Swiddis + + +Does this need to be backported even? We can just include it next release + + +### @ishaoxy + + +It is strange. I think we have backported it at #4129. + + +### @LantaoJin + + +> It is strange. I think we have backported it at #4129. + +The `backport 2.19` label added in Aug. This patch is for v2. Let me help on this resolving + + +### @LantaoJin + + +Failures in CI are unrelated now: + +- Link Checker +- doctest:bootstrap +- [startPrometheus](integ-test:startPrometheus) +``` +1: Task failed with an exception. +----------- +* What went wrong: +Execution failed for task ':doctest:bootstrap'. +2: Task failed with an exception. +----------- +* What went wrong: +Execution failed for task ':integ-test:startPrometheus'. +3. doctest:doctest +ModuleNotFoundError: No module named 'opensearch_sql_cli' +``` + +cc @ykmr1224 @Swiddis please kindly approve and merge. + + +--- + +# PR #4171: [Backport 2.19-dev] Support ```bin``` command with Calcite (#3878) + +**URL:** https://github.com/opensearch-project/sql/pull/4171 + +**Author:** @ahkcs + +**Created:** 2025-08-29T00:47:56Z + +**State:** MERGED + +**Merged:** 2025-08-29T19:59:23Z + +**Changes:** +5673 -6 (71 files) + + +## Description + +(cherry picked #3878 from commit 71076f79cf6f7d365408766d5de77b125e57c9b5) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4168: [AUTO] Increment version to 2.19.4-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/4168 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-29T00:10:15Z + +**State:** MERGED + +**Merged:** 2025-08-29T15:12:10Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `v2.19.4` + + +## Description + +- Incremented version to **2.19.4-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4167: [Backport 2.19-dev] PPL Coalesce Function Enhancement for Calcite - Advanced Type Handling and Field Support + +**URL:** https://github.com/opensearch-project/sql/pull/4167 + +**Author:** @aalva500-prog + +**Created:** 2025-08-28T23:21:08Z + +**State:** MERGED + +**Merged:** 2025-08-29T15:12:52Z + +**Changes:** +826 -24 (11 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/6f779b47024b3b7dc72e1148280d5b0ce6766d8f from https://github.com/opensearch-project/sql/pull/4041. + +**Note for maintainers:** this is a duplicate of https://github.com/opensearch-project/sql/pull/4162. Please keep this PR and closed the one created by the bot, thanks! + + + +## Reviews + + +### @LantaoJin - APPROVED + + +@aalva500-prog ~~next time, you can actually continue to work with the one created by the bot, no need to create a duplicated new PR. Anyway, approved on this.~~ +Oh, sorry the write to upstream permission may block you. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@yuancu did you see any permission issue to push commits to the upstream branch being created by bot before you became a maintainer? + + +### @yuancu + + +Yes, I could not directly push to the backport branch created by the bot. + +I assume that I would need to create another PR to merge my changes to the branch created by the bot, so I just created a new backport PR every time. + + +--- + +# PR #4166: Pushdown earliest/latest aggregate functions + +**URL:** https://github.com/opensearch-project/sql/pull/4166 + +**Author:** @ykmr1224 + +**Created:** 2025-08-28T23:03:58Z + +**State:** MERGED + +**Merged:** 2025-09-03T16:19:55Z + +**Changes:** +127 -12 (7 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +- Pushdown earliest/latest aggregate functions to OpenSearch DSL +- It actually push down arg_min / arg_max aggregate functions to top_hits aggregation query. +- Fixed Spark SQL to use min_by / max_by (arg_min / arg_max is not available in Spark) + +### Related Issues +- https://github.com/opensearch-project/sql/issues/3639 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @anasalkouz - COMMENTED + + +Shall we update our documentation with some use-case/examples? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +> Shall we update our documentation with some use-case/examples? + +This is performance improvement by aggregate pushdown, and don't require doc update. + + +### @ykmr1224 + + +backport is blocked by backport of https://github.com/opensearch-project/sql/pull/4100 + + +--- + +# PR #4165: [Backport 2.19-dev] Support `count(eval)` expression with stats command + +**URL:** https://github.com/opensearch-project/sql/pull/4165 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-28T21:57:56Z + +**State:** MERGED + +**Merged:** 2025-08-28T23:28:12Z + +**Changes:** +307 -13 (7 files) + +**Labels:** `enhancement`, `PPL` + + +## Description + +Backport 29c8b72148ba9a46de4d16bd4383eca25c520ffd from #4103. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4164: [Backport 2.19-dev] Add shortcut for count() + +**URL:** https://github.com/opensearch-project/sql/pull/4164 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-28T21:50:26Z + +**State:** MERGED + +**Merged:** 2025-08-28T21:51:02Z + +**Changes:** +68 -2 (6 files) + + +## Description + +Backport b3d3c5fb01304caebf91a07b2b2104e6fe191aef from #4142. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4163: `Bin` command big5 queries + +**URL:** https://github.com/opensearch-project/sql/pull/4163 + +**Author:** @ahkcs + +**Created:** 2025-08-28T21:12:22Z + +**State:** MERGED + +**Merged:** 2025-09-05T17:35:38Z + +**Changes:** +26 -0 (4 files) + +**Labels:** `testing`, `backport 2.19-dev` + + +## Description + +### Description +Adding big5 queries used for performance testing in the new features implemented for the `bin` command. + +### Related Issues +#3878 + + + +## Reviews + + +### @noCharger - COMMENTED + + +@ahkcs What queries generate the most overhead? If the implementation does not diverge, they can unite into one. + + +## Review Comments + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +QQ: Why do we need to load from separate file? Query seems simple and it should be simpler to write here. + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +I think we are adding this IT to verify if the queries we put under the big5 queries folder works, also to be consistent with merged PR: https://github.com/opensearch-project/sql/pull/4127 + + +### @Swiddis on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +^ keeping it separate helps us copy the queries as benchmark cases, we want to ideally just copy the contents of `big5/queries` and generate workloads from it. + + +## General Comments + + +### @LantaoJin + + +please update `CalcitePPLBig5IT` + + +### @ahkcs + + +> @ahkcs What queries generate the most overhead? If the implementation does not diverge, they can unite into one. + +I have removed the other test cases and kept + +1. bin_span_time.ppl - Time-based binning +2. bin_span_log.ppl - Logarithmic binning +3. bin_bins.ppl - Magnitude-based binning + + + + +--- + +# PR #4161: Add support for `list()` multi-value stats function + +**URL:** https://github.com/opensearch-project/sql/pull/4161 + +**Author:** @ps48 + +**Created:** 2025-08-28T19:54:36Z + +**State:** MERGED + +**Merged:** 2025-09-03T17:11:54Z + +**Changes:** +540 -0 (15 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR adds support for the `list()` aggregation function in PPL (Piped Processing Language), which collects field values into an array while preserving duplicates and order. Splitting the PR for list and values from: https://github.com/opensearch-project/sql/pull/4042 + +- Core Function Registration: Added LIST to `BuiltinFunctionName.java` and registered it in `PPLFuncImpTable.java` +- Grammar Support: Updated `OpenSearchPPLParser.g4` to recognize the list function in PPL syntax +- Documentation: Enhanced `docs/user/ppl/cmd/stats.rst` with comprehensive examples showing: +- Comprehensive Testing: Added extensive integration tests in `CalciteMultiValueStatsIT.java` covering all supported data types (boolean, byte, numeric, string, etc.) + +### Related Issues +#4026 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/stats.rst:None` + + +order is undetermiend + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiValueStatsIT.java:250` + + +assert rows = 0 + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiValueStatsIT.java:235` + + +assert values + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiValueStatsIT.java:214` + + +assert values + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +revert values + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +@yuancu @qianheng-aws Please help review customrized ARRAY_ARG. + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:442` + + +Add a group by test case + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Can we leverage ARRAY_SLICE? https://github.com/apache/calcite/pull/4194 + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +From the implementation, it seems it will return less elements than expected. + +For example, if there are 200 elements, but there are 5 nulls in the first 100 elements, this implementation will return 95 elements instead of 100 elements (first 105 elements minus 5 nulls). + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Do you mean using `ARRAY_SLICE` to cut the length to 100 after `ARRAY_AGG`? I think it's feasible but less efficient as it needs to construct a complete ARRAY first, which may be too big. + + + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +I'm thinking if we should implement this aggregate function by ourself, which should get better performance. That customized function should imitate the implementation of `ARRAY_AGG` or `COLLECT` except it has additional logic to limit the length of the final output collection. + +The current approach has to perform `window` first and then `aggregation`, they are both very heavy operators. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +agree. @ps48 UDAF then make sense. + + +### @ps48 on `docs/user/ppl/cmd/stats.rst:None` + + +updated + + +### @ps48 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiValueStatsIT.java:250` + + +added the extra check + + +### @ps48 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiValueStatsIT.java:235` + + +added assertion + + +### @ps48 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiValueStatsIT.java:214` + + +added assertion + + +### @ps48 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +reverted + + +### @ps48 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:442` + + +added it https://github.com/opensearch-project/sql/pull/4161/commits/3311c893f6f83bf0115a0154f16e10d1b28222e3 + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ListAggFunction.java:24` + + +Remove "Similar to the TAKE function" + + +### @ps48 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ListAggFunction.java:24` + + +Sure will update this in a following PR. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4160: Add missing license headers from #4120 + +**URL:** https://github.com/opensearch-project/sql/pull/4160 + +**Author:** @Swiddis + +**Created:** 2025-08-28T18:43:51Z + +**State:** MERGED + +**Merged:** 2025-08-28T23:01:03Z + +**Changes:** +10 -0 (2 files) + +**Labels:** `maintenance` + + +## Description + +### Description +#4159 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Thanks, my recommendation is enabling auto copyright in IDE: settings->editor->copyright + + +--- + +# PR #4158: [Backport 2.19-dev] Starter implementation for `spath` command (#4120) + +**URL:** https://github.com/opensearch-project/sql/pull/4158 + +**Author:** @Swiddis + +**Created:** 2025-08-28T18:39:26Z + +**State:** MERGED + +**Merged:** 2025-09-16T16:58:28Z + +**Changes:** +430 -4 (14 files) + + +## Description + +Backport + license headers, will apply license headers in `main` with the followup PR. + +### Related Issues +Resolves #4119 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ykmr1224 - COMMENTED + + +Let's fix the title prefix to `[Backport 2.19-dev]` unless it is intentional (due to length?) + +CI is failing. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4157: Add count alias to count() + +**URL:** https://github.com/opensearch-project/sql/pull/4157 + +**Author:** @selsong + +**Created:** 2025-08-28T17:24:51Z + +**State:** MERGED + +**Merged:** 2025-08-29T18:43:07Z + +**Changes:** +133 -12 (7 files) + +**Labels:** `enhancement`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Add count alias to count() +Supports count syntax without parentheses in stats PPL function. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @noCharger - DISMISSED + + +Nit: let's consider parameterized test since the only var is the query string + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +1. `countAllFunctionCall` is duplicate for both? +2. Is `c` without parentheses valid? I'm thinking can we simply change previous rule to make `LT_PRTHS RT_PRTHS` optional. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java:100` + + +Is this test for V2 engine? + + +### @selsong on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Yes c without parenthesis is valid. I can change it to make LT_PRTHs and RT_PRTHs optional + + +### @selsong on `integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java:100` + + +Yes, this test is designed to work with both V2 and V1 engines. The test includes conditional schema verification using isCalciteEnabled() to handle the different return types between engines + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java:100` + + +Sure, as discussed we can skip V2 changes if not straightforward. + + +### @noCharger on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:1080` + + +why rewrite this? seems same logic + + +## General Comments + + +### @RyanL1997 + + +may need to rebase and resolve the conflicts. cc @selsong + + +--- + +# PR #4156: [Backport 2.19-dev]Add regex_match function for PPL with Calcite engine support (#4092) + +**URL:** https://github.com/opensearch-project/sql/pull/4156 + +**Author:** @vamsimanohar + +**Created:** 2025-08-28T07:55:55Z + +**State:** MERGED + +**Merged:** 2025-08-28T16:58:43Z + +**Changes:** +465 -2 (11 files) + + +## Description + +(cherry picked from commit e6c36abb5bc5aaedeed2894761ebf8fa07da6c05) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4155: [Feature][Enhancement] Enhance patterns command with additional sample_logs output field + +**URL:** https://github.com/opensearch-project/sql/pull/4155 + +**Author:** @songkant-aws + +**Created:** 2025-08-28T06:43:26Z + +**State:** MERGED + +**Merged:** 2025-09-24T07:00:43Z + +**Changes:** +119 -41 (8 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Enhance patterns command with additional `sample_logs` output field. + +`patterns` command in V2 engine returns `sample_logs` field in aggregation result. After Calcite is enabled, the `sample_logs` is not included in V3 `patterns` command aggregation result. This PR addresses this backward compatible change. + +### Related Issues +Resolves #4139 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - DISMISSED + + +LGTM + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4154: [Backport 2.19-dev] Reorder the maven repositories to resolve the service unavailable issue + +**URL:** https://github.com/opensearch-project/sql/pull/4154 + +**Author:** @LantaoJin + +**Created:** 2025-08-28T03:13:58Z + +**State:** MERGED + +**Merged:** 2025-08-28T07:08:49Z + +**Changes:** +8 -7 (3 files) + + +## Description + +Manually backport #4153 . + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4153: Reorder the maven repositories to resolve the service unavailable issue + +**URL:** https://github.com/opensearch-project/sql/pull/4153 + +**Author:** @LantaoJin + +**Created:** 2025-08-28T02:57:23Z + +**State:** MERGED + +**Merged:** 2025-08-28T07:08:01Z + +**Changes:** +6 -7 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Got multiple service unavailable CI failures after #4141, reorder the repositories to resolve the problem. + + +> Could not HEAD 'https://ci.opensearch.org/ci/dbc/snapshots/org/apache/calcite/calcite-linq4j/1.38.0/calcite-linq4j-1.38.0.pom'. Received status code 503 from server: Service Unavailable +> +> Could not HEAD 'https://ci.opensearch.org/ci/dbc/snapshots/jakarta/annotation/jakarta.annotation-api/1.3.5/jakarta.annotation-api-1.3.5.pom'. Received status code 503 from server: Service Unavailable + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4152: Search command revamp. + +**URL:** https://github.com/opensearch-project/sql/pull/4152 + +**Author:** @vamsimanohar + +**Created:** 2025-08-27T22:29:24Z + +**State:** MERGED + +**Merged:** 2025-09-15T20:21:28Z + +**Changes:** +3768 -237 (70 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @vamsimanohar + + +## Description + +### Description + +**Note** : Although the PR says 70 file changes, most of them are test file changes and PPL query changes in old files. Below are the important files to review. + +``` + Parser & Grammar + + - OpenSearchPPLParser.g4 - Grammar extensions for search expression syntax + - AstBuilder.java - Constructs Search AST from parse tree + - AstExpressionBuilder.java - Builds search expression tree + + Search Expression AST Classes + + - Search.java - Enhanced Search AST node with expression support + - SearchExpression.java - Base interface for search expressions + - SearchComparison.java - Field comparison operators (=, !=, <, >, <=, >=) + - SearchIn.java - IN operator implementation + - Search{And,Or,Not}.java - Boolean operators + - SearchLiteral.java - Free text search support + + Core Integration Points + + - Analyzer.java - Integrates search expression analysis into query pipeline + - CalciteRelNodeVisitor.java - Converts Search nodes to relational operations for Calcite execution path + ``` + + + + + **Summary** + The search command has been redesigned with well-defined full text search functionality instead of being an extended where clause with source support. It now supports new search + expressions that translate directly to OpenSearch's query_string DSL for optimal performance. + + The new boundary for the search command: it is designed exclusively for full-text search, always pushed down as the first operation, and limited to Lucene query syntax support. Any additional filtering should be achieved using the where command. + + The revamped search command leverages https://lucene.apache.org/core/2_9_4/queryparsersyntax.html and only supports: + - Field comparisons: search source=logs status=200 AND method="GET" + - Boolean operators: AND, OR, NOT with proper precedence handling + - IN operator: search source=logs status IN (200, 201, 204) + - Range queries: search source=logs responseTime>100 AND responseTime<=500 + - Free text search: search source=logs "error message" + + All search expressions are pushed down to OpenSearch as query_string queries, ensuring efficient data filtering at the storage layer before entering the processing pipeline. + + Solution + + Key Design Decisions + + 1. Separate AST Hierarchy: Introduced dedicated SearchExpression classes distinct from regular Expression classes, maintaining clean separation between search-time filtering and + query-time evaluation + 2. Direct Query DSL Translation: Search expressions bypass logical/physical planning stages and translate directly to OpenSearch Query DSL in the Analyzer phase for optimal + performance + 3. Grammar Extension: Extended PPL parser to support rich search syntax while preserving full backward compatibility with existing queries + +**How It Works** + + - Parser constructs a Search node containing a SearchExpression tree from the query + - Analyzer visits the Search node and transforms expressions directly into OpenSearch query_string queries + - Queries execute at the OpenSearch level, filtering data efficiently before it enters the processing pipeline + +### Related Issues +Resolves #4007 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +Hi Vamsi, thank you for the work. Can you please add tests in ExplainIT for main cases? + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/search.rst:25` + + +Re-order the doc, Explain search expression, then example. + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +what does search term means? match phrase? +does search command support `search error`? use case is match doc which default field include error. + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +let's highlight example 7, this is most powerfull case. + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +what is difference of `search "Amber"` vs `search Amber`? + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +Previous section callout the following Lucene special characters are automatically escaped in values. Is it conflict. + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +search expression section already explain this, not need to re-emphasise. + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +we want to call-out limitation of each data type in this section? e.g., +* keyword field support wildcard and exactally match +* text field limitation on support wildcard search +``` +{"message": "[2025-08-26] logtype=ws:access error"} + +message="*=ws:access*" does not match. +``` + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:None` + + +Changed to `error` and added more details. + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:None` + + +My bad, we automatically escape all the special chards except for /, *, ?. + +These characters are not escaped as they are used for wildcard queries. If someone needs to search for these characters, they need to escape with backslash / . + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:None` + + +sure, also there are some edge cases with current implementation, which I am going to come with an issue. + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:None` + + +same...no difference + + +### @vamsimanohar on `docs/user/ppl/cmd/search.rst:None` + + +@penghuo updated the text. +Will update further for date types and any missing types after testing. + + +### @penghuo on `docs/user/ppl/cmd/search.rst:None` + + +1. add test case without double quote, `search Amber source=accounts` +2. explain difference of token w/o double quote in search expression section. + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +When should use single quote and backtic? e.g. + +### +POST {{baseUrl}}/log00001/_bulk +Content-Type: application/x-ndjson + +{ "index": { "_id": 1 } } +{"message": "(1+1)=2"} +{ "index": { "_id": 2 } } +{"message": "The Right Way"} + + +`"search source=log00001 '(0+1)=2'"` should not match? + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +nit, simply for-loop as +``` +combined = searchExprs.stream() + .map(SearchGroup::new) + .reduce((acc, expr) -> new SearchAnd(acc, expr)) + .orElse(null); +``` + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +searchExprs.size() could be 0, then combined is null? + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:768` + + +Is better to create SearchExpressionAnalyzer instead? + +SearchExpression is only used by search command, and SearchExpression is not composite with other expression. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java:28` + + +https://github.com/opensearch-project/sql/pull/3834/files, does this PR already cover query_string push down? + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +what is use case of query_string search table name? + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +"search source=log00001 \"a'b\"" + +``` +{ "index": { "_id": 3 } } +{"message": "a'b"} +{ "index": { "_id": 4 } } +{"message": "'a'b"} +``` + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java:28` + + +This is used in case pushdown not enabled scenario. + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +This is to support as much as possible in unquoted strings. + + +In unquoted Strings, I am only supporting now DQUOTA_STRING and ID only which covers only alphanumeric. Anything else should be inside double quites. + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java:28` + + +In case if customer disables pushdown setting? search still would be pushed down. Without this there is a lot of refactoring to existing ITs where simple queries like source=index age=10 are failing in NoCalcitePushDown + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +``` +if (ctx.searchExpression().isEmpty()) { + return visitFromClause(ctx.fromClause()); +} +``` + +This removes the case where searchExprs.size() = 0 case...but I can be futher defensive and add this condition at relevant place. + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:768` + + +You mean SearchExpressionBuilder? + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java:28` + + +Created an issue to remove the above rule when the setting is removed in future. + +Meta Issue: https://github.com/opensearch-project/sql/issues/4260 + +Actual Issue: https://github.com/opensearch-project/sql/issues/4262 + + +### @yuancu on `docs/user/ppl/cmd/search.rst:None` + + +*OpenSearch query_string queries* or *PPL query string function* may more accurate? Please make a link to the query string doc page to make it easier for users who did not know query string: https://docs.opensearch.org/latest/query-dsl/full-text/query-string/. (Alternatively, a link to PPL query string function also works) + + +### @yuancu on `integ-test/src/test/resources/big5/queries/asc_sort_timestamp_can_match_shortcut.ppl:1` + + +Can you confirm that this is not a breaking change? i.e. there is no customer use case for match in search queries. Other discontinued logical expression syntax includes `search ... exists [subsearch]`, `search ... in [subsearch]`, etc. + +cc @LantaoJin + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java:48` + + +Should the field be validated somewhere? It seem it feeds whatever user feeds into the query string. A wrong field name will never get alter. + + +### @yuancu on `docs/user/ppl/cmd/search.rst:None` + + +Can you make it explicit that multiple unquoted literals result in an AND relationship? e.g. `search a b` is equivalent to `search a AND b`. + +This came to me because the default behavior of string query seem to be OR. E.g. +```json +GET time_test/_search +{ + "query": { + "query_string": { + "query": "value:8945 7623" + } + }, + "_source": { + "includes": [ + "value" + ] + } +} +``` + +Results: +```json +{ + "hits": [ + { + "_index": "time_test", + "_id": "1", + "_score": 1, + "_source": { + "value": 8945 + } + }, + { + "_index": "time_test", + "_id": "2", + "_score": 1, + "_source": { + "value": 7623 + } + } + ] +} + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4151: [Backport 2.19-dev] PPL percentile function shortcut `perc()` and `p()` support + +**URL:** https://github.com/opensearch-project/sql/pull/4151 + +**Author:** @aalva500-prog + +**Created:** 2025-08-27T21:45:04Z + +**State:** MERGED + +**Merged:** 2025-08-28T21:49:23Z + +**Changes:** +366 -33 (9 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/aecca57878d9ddee5036c31dc9288cff7e818598 from https://github.com/opensearch-project/sql/pull/4085. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Is it duplicated to #4133 ? + + +--- + +# PR #4150: Extract getOnlyForCalciteException + +**URL:** https://github.com/opensearch-project/sql/pull/4150 + +**Author:** @ykmr1224 + +**Created:** 2025-08-27T20:44:09Z + +**State:** MERGED + +**Merged:** 2025-09-03T16:06:14Z + +**Changes:** +45 -69 (5 files) + +**Labels:** `maintenance`, `backport 2.19-dev` + + +## Description + +### Description +- Refactor to extract getOnlyForCalciteException + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ykmr1224 + + +will backport to 2.19-dev to avoid conflicts. + + +--- + +# PR #4142: Add shortcut for count() + +**URL:** https://github.com/opensearch-project/sql/pull/4142 + +**Author:** @noCharger + +**Created:** 2025-08-27T07:54:28Z + +**State:** MERGED + +**Merged:** 2025-08-28T00:49:41Z + +**Changes:** +68 -2 (6 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Add shortcut for count() + +### Related Issues +N/A + +### Check List +~~- [ ] New functionality has javadoc added.~~ N/A +~~- [ ] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/DEVELOPER_GUIDE.rst#new-ppl-command-checklist) all confirmed.~~ N/A +~~- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md).~~ N/A +~~- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose).~~ N/A + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +Do we need to worry about other cases where people expect `count` as a string literal representing count? e.g. I reviewed [this condition match](https://github.com/opensearch-project/sql/pull/3993/files/8da537f2c713c06e52c6afa0e97264cd831fac99#diff-518ddbde8cc139f2be35ffaeccd6dca4468147cc06f24f3458792e05584a61daR1350) yesterday, but also stuff like [SelectResultSet](https://github.com/opensearch-project/sql/blob/b452f07c2706939c74620eedbdfc4893c6a9afff/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java#L345-L346) or [Flint's aggregate function map](https://github.com/opensearch-project/sql/blob/b452f07c2706939c74620eedbdfc4893c6a9afff/async-query-core/src/main/java/org/opensearch/sql/spark/validator/FunctionType.java#L61) + + + +### @RyanL1997 - APPROVED + + +In generally lgtm. + + +## Review Comments + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java:34` + + +nit: Let's separate method for a different test case. Same for other tests. + + +### @ykmr1224 on `docs/user/dql/aggregations.rst:None` + + +Looks like this doc is for SQL. Should we fix PPL doc? + + +### @ykmr1224 on `docs/user/dql/aggregations.rst:None` + + +Wondering if we need to handle `c(1)` or `c(*)` as well. + + +### @RyanL1997 on `docs/user/dql/aggregations.rst:None` + + +nit: Consider adding a usage example for `C()` to make it clearer for users + + +### @noCharger on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java:34` + + +> Let's separate method for a different test case. + +Having separate test methods for count() and c() doesn't make much sense since both functions produce identical logical plans, results, and the only difference is the parser recognizing c() as an alias for count(). This creates unnecessary code duplication and maintenance overhead + + + + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java:34` + + +If only one thing is tested in a test method, it would be very easy to identify what is going wrong. You might want to refer this post: https://stackoverflow.com/questions/61400/what-makes-a-good-unit-test + + +## General Comments + + +### @noCharger + + +> Do we need to worry about other cases where people expect `count` as a string literal representing count? e.g. I reviewed [this condition match](https://github.com/opensearch-project/sql/pull/3993/files/8da537f2c713c06e52c6afa0e97264cd831fac99#diff-518ddbde8cc139f2be35ffaeccd6dca4468147cc06f24f3458792e05584a61daR1350) yesterday, but also stuff like [SelectResultSet](https://github.com/opensearch-project/sql/blob/b452f07c2706939c74620eedbdfc4893c6a9afff/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java#L345-L346) or [Flint's aggregate function map](https://github.com/opensearch-project/sql/blob/b452f07c2706939c74620eedbdfc4893c6a9afff/async-query-core/src/main/java/org/opensearch/sql/spark/validator/FunctionType.java#L61) + +@selsong will add support for count without parentheses + + +--- + +# PR #4141: Migrate from sonatype snapshot to ci.opensearch.org snapshots + +**URL:** https://github.com/opensearch-project/sql/pull/4141 + +**Author:** @LantaoJin + +**Created:** 2025-08-27T07:37:06Z + +**State:** MERGED + +**Merged:** 2025-08-27T17:56:45Z + +**Changes:** +3 -0 (1 files) + +**Labels:** `infrastructure`, `backport-manually` + + +## Description + +### Description +https://github.com/opensearch-project/sql/pull/4131 provided a hotfix that remove the old sonatype repo. +But https://github.com/opensearch-project/opensearch-build/issues/5360 suggested to migrate from sonatype repo to ci.opensearch.org repo. + +Just removing the sonatype repo didn't work in 2.19-dev branch. This PR is to complete the migration in SQL plugin. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4140: [Backport 2.19-dev] Migrate from sonatype snapshot to ci.opensearch.org snapshots (#4141) + +**URL:** https://github.com/opensearch-project/sql/pull/4140 + +**Author:** @LantaoJin + +**Created:** 2025-08-27T06:32:42Z + +**State:** MERGED + +**Merged:** 2025-08-27T17:56:17Z + +**Changes:** +3 -3 (1 files) + + +## Description + +(cherry picked from #4141 commit 80945963179d4dc24e77165ec928d64a4482d524) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4138: Support refering to implicit `@timestamp` field in span + +**URL:** https://github.com/opensearch-project/sql/pull/4138 + +**Author:** @yuancu + +**Created:** 2025-08-27T05:19:54Z + +**State:** MERGED + +**Merged:** 2025-10-21T02:50:05Z + +**Changes:** +384 -41 (9 files) + +**Labels:** `bug`, `enhancement`, `backport 2.19-dev` + +**Assignees:** @yuancu + + +## Description + +### Description + +This PR allows makes the query `source=my-index | stats count() by span(1h)` equivalent to `source=my-index | stats count() by span(@timestamp, 1h)` + +### Related Issues +Resolves #4136, resolves #4527 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - DISMISSED + + +@yuancu can you fix the conflicts + + +## Review Comments + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Will it be better to set `field` in span to be `new Field(new QualifiedName(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP))` if field is null? Then we don't need any change here. + +And I'm wondering if v2 works well since we only make change for calcite visitor. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +I referred it here because I thought the AST level should be consistent with users' input. E.g. `BY span(1h)` should be parsed to `Span(value=1, unit=hour, field=null)`. Subsequent implicit reference can be added later because this semantic of how absent field should be interpreted is in a higher level than syntax tree construction. + +But it's also not bizarre to refer it while building AST tree. I'll modify the implementation accordingly. + + +### @LantaoJin on `ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java:None` + + +no test for verification? + + +### @yuancu on `ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java:None` + + +Reverted this change to `Span` as span will always come with a field with latest implementation -- I inject `@timestamp` to spans without a field specified in the AST layer now. + + +## General Comments + + +### @yuancu + + +> @yuancu can you fix the conflicts + +Fixed. Thanks for reminding! + + +### @RyanL1997 + + +> This PR is stalled because it has been open for 2 weeks with no activity. + +Hi @yuancu , is this implementation still in track? + + +--- + +# PR #4134: Update fields.rst + +**URL:** https://github.com/opensearch-project/sql/pull/4134 + +**Author:** @aalva500-prog + +**Created:** 2025-08-26T19:52:34Z + +**State:** MERGED + +**Merged:** 2025-08-26T23:39:57Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `PPL`, `maintenance`, `calcite` + + +## Description + +### Description +Update broken link in `fields.rst` documentation file + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4132: [Backport 2.19-dev] Support for == and like operator in where and eval clauses. + +**URL:** https://github.com/opensearch-project/sql/pull/4132 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-26T17:37:31Z + +**State:** MERGED + +**Merged:** 2025-09-03T20:57:49Z + +**Changes:** +341 -2 (7 files) + + +## Description + +Backport 8d929e454d53e7524484c35b161c953ca783484b from #4101. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4131: [Hotpatch][Infra] Fix CI artifact sources + +**URL:** https://github.com/opensearch-project/sql/pull/4131 + +**Author:** @Swiddis + +**Created:** 2025-08-26T17:32:25Z + +**State:** MERGED + +**Merged:** 2025-08-27T02:36:09Z + +**Changes:** +1 -4 (2 files) + +**Labels:** `infrastructure`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Old sonatype repo was finally deactivated, turns out we implicitly still depended on some artifacts from it because we had fallback repositories configured, and many artifacts from the old repo were missing in the new one. + +Removes the (now fully deprecated) fallback, and updates our base version in `main` to the latest version available in the newer snapshot repos. + +### Related Issues +All CI broke from this + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Swiddis + + +Not us, artifact server is supposed to stay up for read access, it's currently unintentionally down. + + +### @Swiddis + + +Old server's dead now, let's just hotpatch onto BWC w/ 3.1 and figure out how to rearchitect later + + +### @LantaoJin + + +Should be related to https://github.com/opensearch-project/opensearch-build/issues/5360? + + +--- + +# PR #4130: Enable single doctest + +**URL:** https://github.com/opensearch-project/sql/pull/4130 + +**Author:** @ykmr1224 + +**Created:** 2025-08-26T03:21:33Z + +**State:** MERGED + +**Merged:** 2025-08-26T18:31:16Z + +**Changes:** +344 -10 (3 files) + +**Labels:** `maintenance`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +- Allow executing single doctest for easier debugging. + +Examples: +``` + # Test all docs + ./gradlew :doctest:doctest + # Test single file using main doctest task + ./gradlew :doctest:doctest -Pdocs=search + + # Test multiple files at once + ./gradlew :doctest:doctest -Pdocs=search,fields,basics + + # With verbose output + ./gradlew :doctest:doctest -Pdocs=stats -Pverbose=true +``` + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `doctest/test_docs.py:502` + + +nit: I would extract the body of this loop to a separate method. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4129: [Backport 2.19-dev] Fix value parsing bug. + +**URL:** https://github.com/opensearch-project/sql/pull/4129 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-26T02:18:52Z + +**State:** MERGED + +**Merged:** 2025-08-26T09:38:25Z + +**Changes:** +106 -7 (2 files) + + +## Description + +Backport fff3e3a2f2a0a7974a871efb76363e54e55bb5bc from #4095. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4128: Update developer guide + +**URL:** https://github.com/opensearch-project/sql/pull/4128 + +**Author:** @ykmr1224 + +**Created:** 2025-08-25T22:17:29Z + +**State:** MERGED + +**Merged:** 2025-08-26T17:33:32Z + +**Changes:** +14 -14 (1 files) + +**Labels:** `documentation`, `maintenance` + + +## Description + +### Description +- Update developer guide doc regarding JDK version +- Minor fixed such as normalizing quotes/spaces + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4127: Adding big5 `coalesce` queries + +**URL:** https://github.com/opensearch-project/sql/pull/4127 + +**Author:** @aalva500-prog + +**Created:** 2025-08-25T22:14:37Z + +**State:** MERGED + +**Merged:** 2025-09-02T23:15:44Z + +**Changes:** +12 -1 (3 files) + +**Labels:** `maintenance`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Adding big5 queries for the new `coalesce` function enhancements. + +### Related Issues +https://github.com/opensearch-project/sql/issues/4005 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @noCharger - COMMENTED + + +Can we add field with null value to just demonstrate functionality? + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/big5/queries/coalesce_empty_string_priority.ppl:None` + + +What's purpose to add the ppl files? Please update the `PPLBig5IT`. + + +### @aalva500-prog on `integ-test/src/test/resources/big5/queries/coalesce_empty_string_priority.ppl:None` + + +The purpose of adding the ppl files is to have the source query for new features/enhancements in commands/functions that include benchmark. We are testing performance in big5 data set whenever we improve an existing command/function or create a new one. Hope that makes sense. + + +### @aalva500-prog on `integ-test/src/test/resources/big5/queries/coalesce_empty_string_priority.ppl:None` + + +Hi @LantaoJin, I tried to add the queries to the `PPLBig5IT` test suite as recommended. During the integration test execution, I have identified two issues that require clarification: + +**Issue 1: Calcite Configuration in PPLBig5IT** +The `PPLBig5IT` class disables Calcite by default in its initialization method. If the new command or enhancements are designed to function exclusively with Calcite enabled, the tests will fail in this configuration. + +```java +@Override +public void init() throws Exception { + super.init(); + loadIndex(Index.BIG5); + disableCalcite(); +} +``` + +**Issue 2: Missing Field in CalcitePPLBig5IT** +When executing tests through CalcitePPLBig5IT, I encounter a field resolution error for `host.name`. The error indicates this field is not present in the current dataset, though it appears to be part of the Big5 dataset mappings used in our EC2 performance testing environment. + +```java +org.opensearch.client.ResponseException: method [POST], host [http://[::1]:61814], URI [/_plugins/_ppl], status line [HTTP/1.1 400 Bad Request] +{ + "error": { + "reason": "Invalid Query", + "details": "field [host.name] not found; input fields are: [agent, agent.ephemeral_id, agent.id, agent.name, agent.type, agent.version, process, process.name, log, log.file, log.file.path, message, tags, cloud, cloud.region, input, input.type, @timestamp, ecs, ecs.version, data_stream, data_stream.dataset, data_stream.namespace, data_stream.type, meta, meta.file, host, metrics, metrics.size, metrics.tmin, aws, aws.cloudwatch, aws.cloudwatch.ingestion_time, aws.cloudwatch.log_group, aws.cloudwatch.log_stream, event, event.dataset, event.id, event.ingested, _id, _index, _score, _maxscore, _sort, _routing]", + "type": "IllegalArgumentException" + }, + "status": 400 +} +``` + +Could you please confirm whether the `host.name` field should be included in the `Big5` dataset used for integration testing in this project? This will help determine if we need to update the dataset or modify the test queries accordingly. + +Thank you for your guidance on resolving these issues. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java:19` + + +It can be a separate task, but we might want to scan the files under `big5/queries` and automatically execute all. + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java:19` + + +Yeah, that's a great idea. + + +### @noCharger on `integ-test/src/test/resources/big5/queries/coalesce_empty_string_priority.ppl:None` + + +This appears to be an edge case query. Any implementation nuances you'd like to benchmark? + + +### @noCharger on `integ-test/src/test/resources/big5/queries/coalesce_numeric_fallback.ppl:None` + + +Does field type matter for `coalesce`? + + +### @aalva500-prog on `integ-test/src/test/resources/big5/queries/coalesce_numeric_fallback.ppl:None` + + +@noCharger No, we have removed strict same-type requirement for all arguments. The new implementation of the `coalesce` function performs automatic type coercion for mixed data types. + + +### @aalva500-prog on `integ-test/src/test/resources/big5/queries/coalesce_empty_string_priority.ppl:None` + + +@noCharger Yes, you are right, this is an edge case. I just wanted to test the new functionality with `big5` data set, as the new `coalesce` function supports empty strings ("") and whitespace strings (" ") as valid values. For example, empty strings are treated as valid non-null values, not skipped. So, this query will return an empty string always. Personally, I believe this functionality will be barely used, so maybe I can remove this query. + + +## General Comments + + +### @aalva500-prog + + +@noCharger To correctly demonstrate `null` values handling, we need to verify the data in the whole `big5` data set to see if there are fields with a `null` value assigned. However, with the new implementation, we can demonstrate `null` values handling with `non-existing` fields also, as `non-existing` fields are treated as `null`. + +For example, the following query will always skip `dummy_field` field. Then, it will continue to the `host.name` field and check if it contains a `null` value. If it does contain a `null` value, it will then check if `metrics.size` is `non-null`. Finally, if both `host.name` and `metrics.size` are null, it will fallback to the `'unknown'` string literal. + +``` +source = big5 +| eval result = coalesce(dummy_field, `host.name`, `metrics.size`, 'unknown') +| head 10 +``` + +Here we are demonstrating mixed data type handling in the same `coalesce` function, along with `non-existing` fields treated as `null` and string literal fallback. I think for the purpose of this exercise, we can leave this query only. Please let me know what you think, thanks! + + +### @noCharger + + +> @noCharger To correctly demonstrate `null` values handling, we need to verify the data in the whole `big5` data set to see if there are fields with a `null` value assigned. However, with the new implementation, we can demonstrate `null` values handling with `non-existing` fields also, as `non-existing` fields are treated as `null`. +> +> For example, the following query will always skip `dummy_field` field. Then, it will continue to the `host.name` field and check if it contains a `null` value. If it does contain a `null` value, it will then check if `metrics.size` is `non-null`. Finally, if both `host.name` and `metrics.size` are null, it will fallback to the `'unknown'` string literal. +> +> ``` +> source = big5 +> | eval result = coalesce(dummy_field, `host.name`, `metrics.size`, 'unknown') +> | head 10 +> ``` +> +> Here we are demonstrating mixed data type handling in the same `coalesce` function, along with `non-existing` fields treated as `null` and string literal fallback. I think for the purpose of this exercise, we can leave this query only. Please let me know what you think, thanks! + +Sounds good, do we still need all cases or just one query demonstrate all? + + +### @aalva500-prog + + +@noCharger IMO, one query should be enough. I'll update the PR and let you know. + + +--- + +# PR #4126: `fields` and `table` command big5 queries + +**URL:** https://github.com/opensearch-project/sql/pull/4126 + +**Author:** @aalva500-prog + +**Created:** 2025-08-25T21:38:38Z + +**State:** MERGED + +**Merged:** 2025-08-26T19:55:17Z + +**Changes:** +12 -0 (4 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Adding big5 queries used for performance testing in the new features implemented for the `fields` and `table` commands. + +### Related Issues +https://github.com/opensearch-project/sql/issues/3888 and https://github.com/opensearch-project/sql/issues/3877 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @noCharger - COMMENTED + + +I think we just need 1 or 2 queries, which would add most of the overhead among all of them + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @aalva500-prog + + +@noCharger The full wildcard query causes the most overhead as it must expand to all fields in the big5 dataset and every field name must be processed. Kindly note that `table` is an alias of the `fields` command, so they are basically the same thing. + +``` +- source = big5 | fields * | head 10 +- source = big5 | table * | head 10 +``` + +Then, the second highest overhead is caused by the mixed explicit and wildcard patterns query, as multiple wildcard pattern matching operations are required: + +``` +- source = big5 | fields @timestamp, metrics* *name | head 10 +- source = big5 | table @timestamp, metrics* *name | head 10 +``` + +Please let me know which ones we should keep, thanks! + + +### @noCharger + + +> @noCharger The full wildcard query causes the most overhead as it must expand to all fields in the big5 dataset and every field name must be processed. Kindly note that `table` is an alias of the `fields` command, so they are basically the same thing. +> +> ``` +> - source = big5 | fields * | head 10 +> - source = big5 | table * | head 10 +> ``` +> +> Then, the second highest overhead is caused by the mixed explicit and wildcard patterns query, as multiple wildcard pattern matching operations are required: +> +> ``` +> - source = big5 | fields @timestamp, metrics* *name | head 10 +> - source = big5 | table @timestamp, metrics* *name | head 10 +> ``` +> +> Please let me know which ones we should keep, thanks! + +Nice, let's have all four + + +--- + +# PR #4123: Implement `Append` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4123 + +**Author:** @songkant-aws + +**Created:** 2025-08-25T05:25:52Z + +**State:** MERGED + +**Merged:** 2025-09-05T08:15:46Z + +**Changes:** +942 -2 (20 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Implement `Append` command in V3 Calcite engine. This `Append` command is quite similar to standard SQL union all operator. + +### Related Issues +Resolves #4078 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +Can you add a test in ExplainIT and CrossClusterSearchIT? + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Are we going to support searching another index in the subsearch, instead of the same index as the main query? In the current implementation, this seems to be unsupported. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1222` + + +Let's organize it better by extracting methods. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1222` + + +Can we share logic with `appendcol`? + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1222` + + +No, I think they're different logic. Can't share it with `appendcol` + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Added that support in latest commit, though we haven't reached the agreement in RFC. + +I'm inclined to support this syntax anyway. + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +It's a breaking change. cc @penghuo @brijos + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/tree/Append.java:28` + + +what's the different between `searchPlan` and `child`? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1222` + + +> Let's organize it better by extracting methods. + +Extracting methods and Util classes makes the logic more hard to understand, unless the logic could be reused. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:None` + + +this `PlanUtils` is for Calcite RelNode actually. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +can you explain what is `empty values`? I didn't see any IT for this. +IMO, the searchPlan is child of the `node` and you have already visited its children in step 1. + + +### @LantaoJin on `docs/user/ppl/cmd/append.rst:None` + + +remove + + +### @LantaoJin on `docs/user/ppl/cmd/append.rst:None` + + +3.3.0 + + +### @LantaoJin on `docs/user/ppl/cmd/append.rst:None` + + +I think we can remove this since we plan to enable calcite in 3.3.0 + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +seems they are all missing in `commandName` + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendTest.java:None` + + +Supporting `append [ ]` is on purpose? + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +Maybe you could separate to two PRs, since one of it should be marked to `breaking`. + + +### @songkant-aws on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +That's fair. I can make `appendcols` another PR. + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/ast/tree/Append.java:28` + + +The `child` is the main query. However, the `searchPlan` is the first optional searchCommand node starting in subsearch. + +This is for matching optional `searchCommand` in the head of subsearch for `append` or `appendcols` command. + +For example, in future, this query is a valid query: `search | append [ | inputlookup myexcel | fields myfield ]` +In above query case, the searchCommand between square brackets is optional because `inputlookup` command is a command similar to TableFunction. Users have freedom to choose whether data comes from search or local input. The searchPlan is used for parsing empty searchCommand as a 0 row * 0 col `LogicalValues[[]]` RelNode. + +Another reason is to use searchPlan to handle an edge case of appending empty subresults. Some other pipeline language has similar functionality to allow subsearch outputs empty result. For example, `search | append [ ]` and `search | append [ | fields a, b, c ]` are the same because subsearch start with 0 row * 0 col input. The syntax is legit to append empty result. It is equivalent to main query. + +Calcite is a strong schema engine, parsing a RelNode like +``` +Project(a = [$0], b = [$1], ..) + LogicalValues[[]] +``` +will throw exception when either building RelNode or at runtime. Because Project cannot find any 'a' column from the input. + +I'm open to this discussion, we have two options here: +1. Since we're using strong schema engine, we should just throw exception. This option is simple but may have inconsistent behavior with other pipeline language. +2. We add logics to use empty values like `LogicalValues[[]]` to handle edge cases. A better way is probably to determine whether subsearch RelNode's leaf is empty values. But still it has the problem of building RelNode successfully. + + + + + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:None` + + +It's used in handling edge case of determining RelNode in subsearch as described in https://github.com/opensearch-project/sql/pull/4123#discussion_r2308997609. But I'm thinking it can be moved to AstBuilder as well. + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +See my explanation in: https://github.com/opensearch-project/sql/pull/4123#discussion_r2308997609 + + +### @songkant-aws on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendTest.java:None` + + +Yes. It's on purpose to test the edge case of appending 0 row * 0 col subresult. See my explanation in: https://github.com/opensearch-project/sql/pull/4123#discussion_r2308997609 + + +### @songkant-aws on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +done + + +### @songkant-aws on `docs/user/ppl/cmd/append.rst:None` + + +Removed + + +### @songkant-aws on `docs/user/ppl/cmd/append.rst:None` + + +Done + + +### @songkant-aws on `docs/user/ppl/cmd/append.rst:None` + + +Removed + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/ast/tree/Append.java:28` + + +I refactor the code a bit to avoid confusing name of `searchPlan` + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/ast/tree/Append.java:28` + + +I choose to throw exception for empty subsearch input for now because it needs some dirty work to preprocess ast tree to achieve a not useful use case. Not supporting it by throwing exception is elegant. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/tree/Append.java:28` + + +> I choose to throw exception for empty subsearch input for now because it needs some dirty work to preprocess ast tree to achieve a not useful use case. Not supporting it by throwing exception is elegant. + +@songkant-aws I think the the previous implementation looks good to me except the confusing name of `searchPlan`, maybe we need it back with another name such as `emptySource` + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendTest.java:None` + + +Get it, please support the corner case `append [ ]` and `append [ | fields a,b,c ]` + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/ast/tree/Append.java:28` + + +I changed back the empty source support, and for nested case. + + +## General Comments + + +### @LantaoJin + + +@songkant-aws please fix the conflicts. @qianheng-aws @yuancu do one of you have a chance to take another review after conflicts resolved? + + +--- + +# PR #4122: [Backport 2.19-dev] Support distinct_count/dc in eventstats + +**URL:** https://github.com/opensearch-project/sql/pull/4122 + +**Author:** @ahkcs + +**Created:** 2025-08-24T23:41:14Z + +**State:** MERGED + +**Merged:** 2025-09-03T20:51:47Z + +**Changes:** +166 -24 (7 files) + + +## Description + +### Description +Support distinct_count/dc in eventstats #4084 + + + +## Reviews + + +### @LantaoJin - CHANGES_REQUESTED + + +In addition to Java-version-specific fixes in backporting, do not simply modify original tests during backporting. At minimum, provide an explanation for why you're bypassing the issue rather than resolving it fundamentally. + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +why this IT was deleted? do we miss any backporting? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java:601` + + +why `fields` required in these ITs. + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java:601` + + +It will cause the CI to fail, local testing was successful without `fields` + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +This IT was deleted because it was not originally in 2.19-dev branch. And this IT was not created by #4084 so I dropped it in this backport + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Oh, I see. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java:601` + + +> It will cause the CI to fail, local testing was successful without `fields` + +If the original tests were successful without this PR, sounds a potential bug is introducing in this PR. + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java:601` + + +The CI failure is this PR was caused by the order of the schema, and this failure also is not able to be reproduced locally. I suspect that it's triggered by different testing environment in CI and local. Do you think we should also add this `fields` to main for consistency in this case? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java:601` + + +> and this failure also is not able to be reproduced locally. I suspect that it's triggered by different testing environment in CI and local. + +The question is how the PR introduced a reproduced CI failure. we need to figure out the reason. + + +## General Comments + + +### @ahkcs + + +> In addition to Java-version-specific fixes in backporting, do not simply modify original tests during backporting. At minimum, provide an explanation for why you're bypassing the issue rather than resolving it fundamentally. + +The CI failure was reproduced locally by switching to Java 11. Since different Java version will cause the order of the schema to be different, should we add the `fields` on both main and 2.19-dev backport to ensure output consistency for different Java versions? + + +### @LantaoJin + + +> > In addition to Java-version-specific fixes in backporting, do not simply modify original tests during backporting. At minimum, provide an explanation for why you're bypassing the issue rather than resolving it fundamentally. +> +> The CI failure was reproduced locally by switching to Java 11. Since different Java version will cause the order of the schema to be different, should we add the `fields` on both main and 2.19-dev backport to ensure output consistency for different Java versions? + +Do you mean when move to Java 11, even without your PR, the CI will fail either. If yes, we can add `fields`. But if this failures only happened with your PR, we still need to investigate why the PR changed something. + + +### @ahkcs + + +> Do you mean when move to Java 11, even without your PR, the CI will fail either. If yes, we can add `fields`. But if this failures only happened with your PR, we still need to investigate why the PR changed something. + +The CI failure is specifically for my `distinct_count` test cases, no other tests failed + +For testing in main, the project requires Java 21 minimum. In 2.19-dev branch, I have confirmed that CI failure can be reproduced locally with Java 11. When using Java 21, the integration test will pass locally. + +By `local testing` I mean the local branch for this PR + + +### @ykmr1224 + + +@ahkcs @LantaoJin +I think we can put `@Ignore` to the failing test cases for this specific PR, but we should fix it to return result with consistent order regardless of the environment. + + +### @LantaoJin + + +FYI. This PR is intended to resolve the issue of result ordering inconsistencies across different Java versions. https://github.com/opensearch-project/sql/pull/3740 + + +--- + +# PR #4120: Starter implementation for `spath` command + +**URL:** https://github.com/opensearch-project/sql/pull/4120 + +**Author:** @Swiddis + +**Created:** 2025-08-24T04:42:00Z + +**State:** MERGED + +**Merged:** 2025-08-28T17:56:00Z + +**Changes:** +422 -1 (14 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +For now, implementing `spath` (#4119) as a lightweight conversion directly into a `json_extract` eval. We'll add more specific semantics later, but this covers the most common usage for the command as-is. + +For more path behavior, see: https://github.com/opensearch-project/sql/blob/b220be43b082929101ed0905e08eee44fe07c6c3/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java#L107 + +### Related Issues +Related to #4119 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - COMMENTED + + +Notes for reviewers + + +### @RyanL1997 - APPROVED + + +Hi @Swiddis , thanks for taking this on. Generally lgtm, just need to fix the license header for some classes. + + +### @RyanL1997 - APPROVED + + +Hi @Swiddis , thanks for taking this on. Generally lgtm, just need to fix the license header for some classes. + + +### @dai-chen - APPROVED + + +Please fix all license header as Ryan commented. + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/ast/tree/SPath.java:None` + + +This function's the core bit, rest is plumbing + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractFunctionImpl.java:None` + + +Usability patch. Without this, `JSON_EXTRACT` raises exceptions on any missing fields (which kills extracting from flexibly-typed inputs, one of the major reasons to rely on string types). + +The user-facing error without this on a missing value is `java.lang.NullPointerException: Cannot invoke "Object.toString()" because "candidate" is null`. + + +### @Swiddis on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +todo: Going to rewrite this to allow arguments in arbitrary order. + + + +### @Swiddis on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:537` + + +If an argument appears multiple times, we overwrite it with the new value. + +This is consistent with how the other commands behave under the same scenario. + + + +### @Swiddis on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:537` + + +If an argument appears multiple times, we overwrite it with the new value. + +This is consistent with how the other commands behave under the same scenario. + + + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractFunctionImpl.java:97` + + +Usability patch. Without this, JSON_EXTRACT raises exceptions on any missing fields (which kills extracting from flexibly-typed inputs, one of the major reasons to rely on string types). + +The user-facing error without this on a missing value is java.lang.NullPointerException: Cannot invoke "Object.toString()" because "candidate" is null. + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/ast/tree/SPath.java:54` + + +This is the core behavior, the rest is plumbing/parsing boilerplate. + + +### @penghuo on `docs/user/ppl/cmd/spath.rst:45` + + +results is string? "1" + + +### @penghuo on `docs/user/ppl/cmd/spath.rst:57` + + +What is the expectation of conflict output? + +``` +### +POST {{baseUrl}}/tttt/_bulk +Content-Type: application/x-ndjson + +{ "index": { "_id": 1 } } +{ "@timestamp": "2025-08-25 00:00:01", "id": 1, "nested_out": {"nested_in": "success"} } +{ "index": { "_id": 2 } } +{ "@timestamp": "2025-08-25 00:00:02", "id": 2, "nested_out": {"nested_in": "failed"} } + +### +POST {{baseUrl}}/_plugins/_ppl/ +Content-Type: application/x-ndjson + +{ + "query": "source=tttt | eval doc='{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}' | spath input=doc output=nested_out nest_out.nest_in | fields nested_out.nested_in" +} + + +### +POST {{baseUrl}}/_plugins/_ppl/ +Content-Type: application/x-ndjson + +{ + "query": "source=tttt | eval doc='{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}' | spath input=doc output=nested_out nest_out.nest_in | fields nested_out" +} +``` + + +### @penghuo on `docs/user/ppl/cmd/spath.rst:62` + + +result is string? "[1,2,3,4]" + +"null", and "[]" + + +### @penghuo on `core/src/main/java/org/opensearch/sql/ast/tree/SPath.java:54` + + +It spath is translate to eval, does spath node still required? + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:685` + + +Add a test in CalcitePPLXXXTest, e.g CalcitePPLEvalTest, to verify the PPL query tranlsate SQL. + + +### @Swiddis on `docs/user/ppl/cmd/spath.rst:45` + + +Yep, returns a string -- we can't really have it dynamically choose the type until we have the ability to dynamically make schemas at planning time + + +### @Swiddis on `docs/user/ppl/cmd/spath.rst:62` + + +We could also return `null` instead of `"null"`, but it seems consistent with the whole "stringify into json" rule that the rest of the json_extract method has going on + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/ast/tree/SPath.java:54` + + +We're going to be adding more functionality to it later, so I think it's worthwhile to have the full node here. We could probably be more efficient by parsing it directly into an eval at the tree level if it stays as just this step, though. + + +### @Swiddis on `docs/user/ppl/cmd/spath.rst:57` + + +Same as with `eval`, it overwrites the existing field. This seems correct to me + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java:1` + + +Missing license header. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/ast/tree/SPath.java:1` + + +Missing license header + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java:47` + + +nit: maybe add another error case of malformed JSON / invalid paths + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:537` + + +Even input parameter can be multiple? I think it may be worth adding this comment in code. + + +## General Comments + + +### @dai-chen + + +> Please fix all license header as Ryan commented. + +@Swiddis Are all license header fixed? + + +### @Swiddis + + +It was auto-merged + + +### @LantaoJin + + +@Swiddis need to fix the backporting manually. And add the `backport-manually` label when backporting PR is submitted. + + +--- + +# PR #4117: Doc enhancement for eventstats and bin command + +**URL:** https://github.com/opensearch-project/sql/pull/4117 + +**Author:** @ahkcs + +**Created:** 2025-08-22T23:44:19Z + +**State:** MERGED + +**Merged:** 2025-09-04T18:08:17Z + +**Changes:** +11 -1 (3 files) + +**Labels:** `documentation` + + +## Description + +### Description + Enhanced the DISTINCT_COUNT/DC function documentation in eventstats.rst with detailed information about the Cardinality Aggregation + +Also added `fields` in test to make sure of the consistency of the output among different java versions + +Also enhanced `bin.rst` file to make it clear that the command is available since 3.3 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @dai-chen on `docs/user/ppl/cmd/eventstats.rst:None` + + +maybe better link to OpenSearch doc? + + +### @ahkcs on `docs/user/ppl/cmd/eventstats.rst:None` + + +Added link + + +### @dai-chen on `docs/user/ppl/cmd/eventstats.rst:None` + + +Sorry, I meant probably we don't need to copy these 2 sections from OS-core doc? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4116: Dynamic source selector in PPL Grammar. + +**URL:** https://github.com/opensearch-project/sql/pull/4116 + +**Author:** @vamsimanohar + +**Created:** 2025-08-22T23:33:24Z + +**State:** MERGED + +**Merged:** 2025-09-09T20:00:41Z + +**Changes:** +225 -0 (4 files) + +**Labels:** `enhancement`, `backport 2.19-dev`, `v3.3.0` + +**Assignees:** @vamsimanohar + + +## Description + +### Description +Introduces grammar support for dynamic source clause syntax in PPL that allows + specifying metadata filters alongside source references: source=[sourceList, key=value + filters] + + Changes + + - Grammar: Added dynamicSourceClause in fromClause with support for: + - Source references: myindex, logs*, vpc.flow_logs + - Filter arguments: key=value and key IN (values) + - Optional namespace prefixes: namespace:identifier + - Implementation: visitDynamicSourceClause throws UnsupportedOperationException + (execution not implemented) + + Examples +``` + source=[myindex, logs, fieldIndex="httpStatus", count=100] + source=[vpc.flow_logs, region IN ("us-east-1", "us-west-2")] +``` + Design + + - Backward compatible - existing queries unchanged + - Grammar-only support - execution deferred to future implementation + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +q7, q8, q9, q13, q15, q22 in https://github.com/opensearch-project/sql/tree/main/integ-test/src/test/resources/tpch/queries need to be rewritten. + + +### @vamsimanohar + + +@LantaoJin For now I haven't removed subQuery clause and kept the latest changes as last option. + +I have created a new task for remove the subquery clause which is not required: https://github.com/opensearch-project/sql/issues/4255 + +There is no conflict as of now between the two. + + +--- + +# PR #4113: Add asc keyword to sort command + +**URL:** https://github.com/opensearch-project/sql/pull/4113 + +**Author:** @ritvibhatt + +**Created:** 2025-08-22T22:37:39Z + +**State:** MERGED + +**Merged:** 2025-09-02T22:20:33Z + +**Changes:** +176 -4 (6 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +- Adds asc/a keyword as option in sort command, Specifying asc does not change behavior, sort direction of each field remains as specified (ascending as default) +- Adds missing tests to ```CaclitePPlSortIT``` for desc keyword, count, and type casting enhancements + +## Changes + - Added `ASC` and `A` tokens to PPL lexer and parser grammar + - Updated sort command to accept `asc`/`a` keywords alongside existing `desc`/`d` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - COMMENTED + + +Needs doc, otherwise LGTM + + +### @RyanL1997 - COMMENTED + + +Hi @ritvibhatt, thanks for taking this on, and i just left some comments. + + +## Review Comments + + +### @RyanL1997 on `docs/user/ppl/cmd/sort.rst:25` + + +nit: Consider rewording to 'sorts in ascending order' for clarity. + + +### @RyanL1997 on `docs/user/ppl/cmd/sort.rst:22` + + +Do we have these covered in example section? + + +### @RyanL1997 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:161` + + +Should we consider the precedence if someone accidentally specifies both asc and desc? The grammar allows it but behavior might be undefined. + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSortIT.java:239` + + +nit: Consider adding negative test cases: + - Invalid combinations like 'sort field asc desc' + - Edge cases with count=0" + + +### @ritvibhatt on `docs/user/ppl/cmd/sort.rst:25` + + +The behavior for asc is it will keep whatever the specified field sort order was, so for ```sort firstname, -lastname asc``` firstname will still be sorted ascending and lastname will still be sorted descending. Don't know if 'sorts in ascending order' would make that behavior clear + + +### @ritvibhatt on `docs/user/ppl/cmd/sort.rst:22` + + +Yes! just added 'Since 3.3' in this PR but added examples in previous PR + + +### @ritvibhatt on `ppl/src/main/antlr/OpenSearchPPLParser.g4:161` + + +Just checked using both asc and desc will result in an error + + +### @RyanL1997 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:161` + + +got it. Thanks for confirming that. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4110: [Backport 2.19-dev] Allow type checkering on nested & struct fields for isnull, isnotnull and ispresent (#4044) + +**URL:** https://github.com/opensearch-project/sql/pull/4110 + +**Author:** @yuancu + +**Created:** 2025-08-22T09:29:17Z + +**State:** MERGED + +**Merged:** 2025-08-26T09:29:09Z + +**Changes:** +252 -119 (6 files) + +**Labels:** `bug` + + +## Description + +### Description +Backport #4044 to 2.19-dev + +### Commit message +* Redefine type checkers of isnull, isnotnull, and ispresent to IGNORE + + + +* Prevents filtering with isnull/isnotnull conditions on nested fields + + + +* Prevents pushing down isnull(nested) in PredicateAnalyzer to correct pushdown of complex & partial filters containing isnull(nested) + + + +* Test explaining partial pushdown filter with isnull + + + +* Add interface registerOperator(BuiltinFunctionName, SqlOperator, PPLTypeChecker) to allow registering an operator with a designated type chekcer + + + +--------- + + +(cherry picked from commit 284ecc49f2a1a1428658a75d5ae4dc608476133b) + +### Related Issues +Resolves #4004 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4109: [Feature] Core Implementation of `rex` Command In PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4109 + +**Author:** @RyanL1997 + +**Created:** 2025-08-22T09:26:36Z + +**State:** MERGED + +**Merged:** 2025-09-05T22:41:10Z + +**Changes:** +1624 -4 (31 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Core logic implementation of `rex` Command In PPL + +### Details: + - Core logic of `rex` (Extract Mode Only) + ```bash + | [commands] | rex field= "" | [commands] + | [commands] | rex field= mode=extract "" | [commands] + | [commands] | rex field= "" max_match= | [commands] + ``` + - `field`: The field name to extract data from + - `pattern`: Java regex pattern with named capture groups (?...) for field extraction + - `mode=extract`: Extract mode for field extraction (default and only supported mode) + - `max_match`: Maximum number of matches to capture (returns array of matches) + +**Supported Functionality:** +- **Single extraction**: Extract one match per named capture group +- **Multi pattern extraction**: Extract from multiple named capture groups in one pattern +- **Max match**: Use `max_match=N` for multiple occurrences with both single and multiple groups + +**Note**: This implementation focuses on extract mode only. SED mode and offset field functionality have been intentionally excluded to keep this PR focused and manageable. + +For the handling of default field (`_source` / `_raw`): +- Tracking at issue https://github.com/opensearch-project/sql/issues/4111 + +### Related Issues +* Relate #4108 + + +### Related Analysis +* UDF vs Native Calcite Function: https://github.com/opensearch-project/sql/issues/4108#issuecomment-3231105679 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:2` + + +Add since 3.3 + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +text field -> string field. As text has specific meaning in OpenSearch, it will confuse OpenSearch user. + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +Move this sections into limitations, it is clearly state in Java regex doc. + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +Format bellow table, https://github.com/RyanL1997/sql/blob/d31ab7567c0449191c1e68cb9355d2edc08baa48/docs/user/ppl/cmd/rex.rst + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +> automatically filters out events + +IMO, if address value don't match pattern streetnum and streetname should be null, but the entire envent should be keeped. right? + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +> optionally filters + +Is there any paramaters control it? + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +Add explaination of sed syntax. + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +what is offset_field format, comma seperated key=value pair string? + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:181` + + +We can call-out only java regex is supported, and avoid mention Perl and other flavors in multiple place + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +It is not limitation right? + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +What we want to call-out? do we have releated to test-data? + + +### @penghuo on `docs/user/ppl/cmd/rex.rst:None` + + +What we want to call-out in here?, it is not limitation. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java:90` + + +remove CalciteRenameCommandIT + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/RexCommandIT.java:None` + + +what if rex extract group field name conflict with existing field name? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:436` + + +`\\\"` can be simpliy to `\"` + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +Yes, currently is like this: +```bash +# comma separator, pattern order +username=0-12,domain=14-22 +``` + +Do we need to switch to some of the other format like the following ? +```bash +# & separator +domain=14-22&username=0-12 +``` + + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:2` + + +Fixed, added a version section here: https://github.com/opensearch-project/sql/pull/4109/files#diff-1207ed670a85a160ac986d8775072cb8ae114a599f4c076dae3e740c4d5d687aR16 + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +Added. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +nice catch, and i have fixed all the tables in the rst doc + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +This is not a limitation. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +For this section, I was trying to callout some best practice for how to construct the rex command. For example, this specific line you highlighted, I mentioned the to use specific patterns when possible (e.g., `"(?[^@]+@[^.]+\.com)"` vs `"(?.*@.*)"`), because anchored patterns reduce unnecessary backtracking in regex engine. + +However, just like you suggested in one of the other comments, we can just let user to reference to the official java regular expression doc for these best practice following tips, since it is more focused on the regular expression usage itself, not the actual rex command. + + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +This is not limitation. I was trying to add some description of potential use case of `rex` command. I have removed this section, for following reason: +- checked the other commands' rst file, none of them has this specific section, and they just directly jump into the examples + +According to the above, I have removed it. Thanks for pointing it out. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +fixed. + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java:90` + + +good catch, fixed. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:181` + + +Fixed, I have removed the wording of: +> "- Pattern must use Java regex syntax, not Perl or other flavors" + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +I have actually removed this "Field Behavior" section from "Limitation", for following reasons: +- First, these are not limitations +- We have covered this content in our "Syntax" section + +According to the above, I have removed them entirely. Thanks for calling it out. + + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +fixed. + + +### @RyanL1997 on `docs/user/ppl/cmd/rex.rst:None` + + +In the current revision, I have removed this "Performance Consideration" section which were related to these regex usage specific tips. Instead, I have added a line to reference user to the official java doc for more info: +> - For detailed Java regex pattern syntax and usage, refer to the `official Java Pattern documentation `_ + +at https://github.com/opensearch-project/sql/pull/4109/files#diff-1207ed670a85a160ac986d8775072cb8ae114a599f4c076dae3e740c4d5d687aR194 + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:436` + + +Since the test constructs JSON directly in Java, I need another layer of escaping like this, here is why: + - First `\` escapes the second `\` in Java + - Second `\` escapes the `"` in JSON + - Result: `\\\"` in Java code → `\"` in JSON → `"` when parsed + + I have also done the experiment of changing it to `\"` and it does give me the following 400: +```bash +{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"Failed to parse request payload"}],"type":"illegal_argument_exception","reason":"Failed to parse request payload","caused_by":{"type":"j_s_o_n_exception","reason":"Expected a ',' or '}' at 78 [character 76 line 2]"}},"status":400} +``` + + + + + +### @RyanL1997 on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:1080` + + +Hi @ahkcs , could you review this change to see if it is making sense? After I rebased with you bin cmd change, I need this change since the syntax `field` as been added into the grammar for rex command, so this test instead of returning a illegal argument error, it starts returning antlr error for complained about the grammar usage of this: + +```bash +ArgumentFactoryTest > testBinCommandDuplicateParameter FAILED + java.lang.Exception: Unexpected exception, expected but was + at org.junit.internal.runners.statements.ExpectException.evaluate(ExpectException.java:30) +... +``` +I got the above from a CI run (https://github.com/opensearch-project/sql/actions/runs/17224987607/job/48867804413?pr=4109#step:4:5421) , and I have also verified this by my local testing. + + +## General Comments + + +### @RyanL1997 + + +Btw, for better readability, Im currently working on splitting this PR into separate parts: +- Part 1: Extract (single pattern / multi pattern) + max_match +- Part 2: Sed Mode + Offset Field + +Turning it into draft just for now. + + +### @ahkcs + + +This branch currently has merge conflicts + + +--- + +# PR #4107: FollowUp: Disable GCedMemoryUsage if no concurrent GC MXBean + +**URL:** https://github.com/opensearch-project/sql/pull/4107 + +**Author:** @LantaoJin + +**Created:** 2025-08-22T09:00:05Z + +**State:** MERGED + +**Merged:** 2025-08-26T02:51:51Z + +**Changes:** +49 -23 (3 files) + +**Labels:** `bug` + + +## Description + +### Description +https://github.com/opensearch-project/sql/pull/3983 enables `GCedMemoryUsage` for Calcite. But when the Java version is less than 21, there is no `ConcurrentMXBean` returned from `ManagementFactory.getGarbageCollectorMXBeans()`. Fallback it to `RuntimeMemoryUsage`. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ResourceMonitorIT.java:None` + + +The original test might not trigger GC in single run. Use ClickBench Q30 and execute it 10 times to make sure some CGC could be triggered. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/ResourceMonitorIT.java:None` + + +Does this mean the new solution is less aggressive toward failed queries compared to a real memory check? +And in both case, CircuitBreaker still not triggered? Does it means our solution is still aggregesive? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:65` + + +Did we get runtime exception on 2.19-dev branch? Which JDK version this feature depend on? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:65` + + +Haven't backported to 2.19-dev. I am still working on backporting https://github.com/opensearch-project/sql/pull/4105. Then the JDK version < 20, there is no specific `G1 Concurrent GC` Monitoring registered, ref https://github.com/openjdk/jdk/blob/jdk-19-ga/src/hotspot/share/gc/g1/g1MonitoringSupport.cpp#L91. From JDK 20, `G1 Concurrent GC` was registered in Monitoring, ref https://github.com/openjdk/jdk/blob/jdk-20-ga/src/hotspot/share/gc/g1/g1MonitoringSupport.cpp#L93. + +This PR is to verify if there a Concurrent MXBean registered in JDK before initialing a GCedMemoryUsage. + + +## General Comments + + +### @LantaoJin + + +No need to backport to 2.19-dev. The fixing is included in https://github.com/opensearch-project/sql/pull/4105 + + +--- + +# PR #4106: strftime function implementation + +**URL:** https://github.com/opensearch-project/sql/pull/4106 + +**Author:** @vamsimanohar + +**Created:** 2025-08-22T05:40:14Z + +**State:** MERGED + +**Merged:** 2025-09-15T23:58:22Z + +**Changes:** +1519 -19 (17 files) + +**Labels:** `enhancement`, `backport 2.19-dev`, `v3.3.0` + +**Assignees:** @vamsimanohar + + +## Description + +### Summary + + Implements the STRFTIME function for OpenSearch SQL PPL with Calcite engine, providing + timestamp formatting functionality. + + #### Description + + This PR adds the STRFTIME function that formats Unix timestamps into human-readable + date/time strings using format specifiers. + + #### Key Features + + - Unix Timestamp Support: Accepts Unix timestamps in seconds + - Extended Support: + - Handles millisecond timestamps (auto-detected when value > 100000000000) + - Supports nanosecond precision through floating-point seconds + - Works with PPL timestamp types (from now(), from_unixtime(), etc.) + - Type Safety: Explicitly accepts only NUMERIC and TIMESTAMP types + - String inputs are NOT supported (use unix_timestamp() for conversion) + + #### Implementation Details + + - Input Types Supported: + - INTEGER/LONG: Unix timestamps in seconds or milliseconds (auto-detected) + - DOUBLE/FLOAT: Unix timestamps with fractional seconds + - TIMESTAMP: From functions like now(), from_unixtime(), timestamp() + - Format Specifiers: Full support for standard strftime format codes + - Date: %Y (year), %m (month), %d (day), etc. + - Time: %H (hour), %M (minute), %S (second), etc. + - Shortcuts: %F (YYYY-MM-DD), %T (HH:MM:SS), etc. + + #### Usage Examples + +``` + -- Format Unix timestamp + source=logs | eval formatted = strftime(1521467703, "%Y-%m-%d %H:%M:%S") + -- Result: "2018-03-19 13:55:03" +``` + +``` + -- Format with milliseconds + source=logs | eval formatted = strftime(1521467703123, "%F %T") + -- Result: "2018-03-19 13:55:03" +``` + +``` + -- Format current time + source=logs | eval current = strftime(now(), "%B %d, %Y") + -- Result: "September 02, 2025" +``` + +``` + -- Convert string date first (strings not directly supported) + source=logs | eval ts = unix_timestamp("2018-03-19 13:55:03") + | eval formatted = strftime(ts, "%m/%d/%Y") + -- Result: "03/19/2018" +``` + + #### Notes + + - All timestamps are interpreted as UTC timezone + - Text formatting uses language-neutral Locale.ROOT for consistency + - Calcite may attempt implicit type conversion for string literals, but the function + itself only accepts NUMERIC and TIMESTAMP types + + +### Related Issues +Resolves #[st] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java:196` + + +Is this necessary since we're enabling Calcite by default in 3.3? I assume there are many functions only work with Calcite? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/StrftimeFunction.java:39` + + +Does our existing V2 function can help simplify here? e.g, https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/datetime.rst#date-format, https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/datetime.rst#from-unixtime + + +### @dai-chen on `integ-test/src/test/resources/expectedOutput/calcite/explain_strftime_function.json:3` + + +np: do we need such test for UDF? + + +### @noCharger on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/StrftimeFunction.java:183` + + +Could this cause NPE if the result is null? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/StrftimeFunction.java:39` + + +Date_format uses mysql style identifiers and +strftime time uses POSIX style. +Do you see a reuse? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java:196` + + +It is part of checklist written to send out the same message for all these calcite only functions. + + +### @vamsimanohar on `integ-test/src/test/resources/expectedOutput/calcite/explain_strftime_function.json:3` + + +Not really, these are also part of checklist. + + +### @noCharger on `core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java:None` + + +this logic will not handle large negative millisecond timestamp + + +### @noCharger on `core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java:176` + + +``` + public static int parseInt(String s) throws NumberFormatException { + return parseInt(s, 10); + } +``` + +Do we need to handle exception here? + + +### @noCharger on `core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java:None` + + +Is it necessary to use StringBuffer instead of StringBuilder here? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/StrftimeFunction.java:183` + + +Only if dataTime is null and it doesn't happen as there enough null checks in the calling methods. + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java:176` + + +Not a checked exception right, let it bubble to the customer. + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java:None` + + +sure + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java:None` + + +Good one..although the above function is used only in unit tests...we are n't handling negative timestamps properly. Will make that change. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/StrftimeFunction.java:39` + + +what's the difference? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java:196` + + +Okay, I wasn't aware of that. So we need to add all Calcite only functions to the list you created? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java:196` + + +I checked again, its only for commands. https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md +``` +Unsupported in v2 test: +Add a test in NewAddedCommandsIT +``` + +I think we can skip for functions. I thought its good to have. + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/StrftimeFunction.java:39` + + + + +``` +✅ Specifiers unique to MySQL + +%D → Day of month with English suffix (1st, 2nd, 3rd…). + +%h → Hour (01–12) (alias for %I in MySQL). + +%i → Minutes (00–59). + +%r → 12-hour time with AM/PM (hh:mm:ss AM/PM). + +%U / %u / %V / %v / %X / %x → MySQL’s special week number / year specifiers tied to WEEK() function modes. + +%M → Full month name (January..December). + +⚠️ In POSIX, %M = minutes, not month. + +%c → Month numeric (0–12). + +⚠️ In POSIX, %c = full date/time string. + +%k → Hour (0–23, no leading zero). + +%l → Hour (1–12, no leading zero). + +%s → Seconds (00–59). + +⚠️ In POSIX, %s = Unix epoch timestamp. + +%a → Abbreviated weekday (Sun..Sat). + +⚠️ In POSIX, %a = abbreviated weekday too, but starts with Monday (Mon..Sun). +``` + +``` +✅ Specifiers unique to POSIX/Java + +(not present in MySQL) + +%A → Full weekday name (Monday..Sunday). + +%B → Full month name (January..December). + +%C → Century (two digits). + +%E z → Timezone offset in minutes from UTC. + +%F → ISO 8601 date (YYYY-MM-DD). + +%g / %G → ISO week-based year (2-digit / 4-digit). + +%N → Subsecond digits with precision (nanoseconds). + +%Q → Fractional seconds with precision (default ms). + +%s → Unix epoch seconds. + +%x → Date (MM/dd/yyyy format). + +%X → Time (HH:mm:ss format). + +%z / %:z / %::z / %:::z → Timezone offset formats. + +%Z → Timezone abbreviation (EST, PDT). +``` + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4105: [Backport 2.19-dev] ResourceMonitor only checks the memory health by calculating the memory usage after GC of old gen in v3 (#3983) + +**URL:** https://github.com/opensearch-project/sql/pull/4105 + +**Author:** @LantaoJin + +**Created:** 2025-08-22T04:14:40Z + +**State:** MERGED + +**Merged:** 2025-08-25T16:04:32Z + +**Changes:** +424 -120 (21 files) + + +## Description + +(cherry picked #3983 from commit 0398f6852bd3aed3c79b88c2885934cf18e6f71b) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4104: [Backport 2.19-dev] SUM aggregation enhancement on operations with literal (#3971) + +**URL:** https://github.com/opensearch-project/sql/pull/4104 + +**Author:** @qianheng-aws + +**Created:** 2025-08-22T04:06:27Z + +**State:** MERGED + +**Merged:** 2025-08-25T03:14:59Z + +**Changes:** +564 -5 (11 files) + + +## Description + +* SUM aggregation enhancement on operations with literal + + + +* Fix CI + + + +* Keep ignoring q30 for Calcite + + + +* Add UT for PPLAggregateConvertRule + + + +* Add UT for PPLAggregateConvertRule + + + +* Spotless check + + + +--------- + + + +(cherry picked from commit 892355190687033b0f29bae624182c1ba204bf96) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4103: Support `count(eval)` expression with stats command + +**URL:** https://github.com/opensearch-project/sql/pull/4103 + +**Author:** @dai-chen + +**Created:** 2025-08-21T22:48:27Z + +**State:** MERGED + +**Merged:** 2025-08-28T20:48:42Z + +**Changes:** +307 -13 (7 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces support for `count(eval(condition))` function in PPL `stats` command which enables filtered counting capability. Pushdown optimization and additional support for `distinct_count(eval)` and `eventstats` command (low priority) will be worked on next. Please find more details in issue below: https://github.com/opensearch-project/sql/issues/3949#issuecomment-3202204869. + +Key implementation decisions: + +1. `count(eval(...))` is rewritten as `count(CASE WHEN ... THEN 1 ELSE NULL END)`. +2. Only the `count` aggregation is supported (`distinct_count` is planned next). Support for other aggregation functions may be added in the future if the semantic is clear. + +### Related Issues +Resolves (partially) https://github.com/opensearch-project/sql/issues/3949 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - DISMISSED + + +Current solution push down count(eval(p)) as script expression? we plan to agg-filter push down in future PR? + + +### @RyanL1997 - APPROVED + + +Hi @dai-chen , thanks for taking this on. LGTM and I just left a question. + + +## Review Comments + + +### @RyanL1997 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:288` + + +Since we are introducing a dedicated `evalExpression` rule and wiring it into `count`, just for my knowledge, could this potentially be reused for `distinct_count(eval(...))` or other aggregates? + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:288` + + +Yes! I'm thinking reuse this in follow-up PR for `distinct_count`. For other aggregation, the semantic of PPL is not clear, e.g., sum(eval), max(eval) is not documented but also supported in SPL. So here I only enable this for counting function first instead of enabling it in `valueExpression` for any aggregation function. + + +### @RyanL1997 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:288` + + +Got it! Thanks for confirming that. This change is LGTM. + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java:107` + + +nice! + + +## General Comments + + +### @dai-chen + + +> Current solution push down count(eval(p)) as script expression? we plan to agg-filter push down in future PR? + +I think filtered aggregation pushdown is reverted in PR https://github.com/opensearch-project/sql/pull/4002. I'm working on a follow up PR to reenable it correctly. + + +--- + +# PR #4102: [Backport 2.19-dev] `fields` Command Enhancement - Advanced Field Selection Features (Cal… + +**URL:** https://github.com/opensearch-project/sql/pull/4102 + +**Author:** @aalva500-prog + +**Created:** 2025-08-21T22:32:43Z + +**State:** MERGED + +**Merged:** 2025-08-22T03:37:12Z + +**Changes:** +2303 -116 (18 files) + + +## Description + +Backport [6bbd8db](https://github.com/aalva500-prog/sql/commit/6bbd8db5c0d1c90fc25a33373fffc95b9f34cde6) from #3970 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4101: Support for == and like operator in where and eval clauses. + +**URL:** https://github.com/opensearch-project/sql/pull/4101 + +**Author:** @vamsimanohar + +**Created:** 2025-08-21T21:45:28Z + +**State:** MERGED + +**Merged:** 2025-08-26T17:37:11Z + +**Changes:** +341 -2 (7 files) + +**Labels:** `enhancement`, `backport 2.19-dev`, `v3.3.0` + +**Assignees:** @vamsimanohar + + +## Description + +### Description + + ### 1. == Operator + + Add support for == operator as an alternative to = for comparisons in PPL. For clarity, == is only not an alternative for assignment expressions. + + What this PR does: + - Introduces == as an alternative equality operator in PPL (Piped Processing Language) while maintaining backward compatibility + - Both = and == operators are functionally identical for equality comparisons in PPL + + Implementation: + - Added DOUBLE_EQUAL token to PPL lexer and parser grammar files + - PPL parser maps == to the internal = function to maintain consistency with existing infrastructure + - SQL remains unchanged - continues using only = operator + + Key Features: + - ✅ Backward compatible - all existing PPL queries with = continue to work + - ✅ Can mix both operators in the same query: where age = 32 AND state == 'TN' + - ✅ Works in all PPL contexts: WHERE, EVAL, stats, etc. + + Testing: + - Added comprehensive unit tests for parser and expression builder + - All tests passing for both legacy and Calcite engines + + Example Usage: +``` + source=accounts | where age == 32 + source=accounts | where age == 32 AND status = 'active' // Mix both operators + source=accounts | eval is_match = (age == 32) + +``` + + ### 2. like Operator + +Introduces LIKE as an infix operator in PPL, providing SQL-style pattern matching syntax as an alternative to the existing like() function. + + Key Changes: + + 1. Grammar Updates: + - Added LIKE to the comparisonOperator rule in PPL parser + - Allows syntax like field LIKE 'pattern' instead of only like(field, 'pattern') + + 2. Implementation: + - Case-insensitive handling: The operator correctly handles LIKE, like, Like etc., mapping all variations to the lowercase "like" function internally + - Uses equalsIgnoreCase() for the LIKE operator since it's a keyword that can be written in any case + - Properly integrated with existing comparison operator. + + 3. Pattern Matching Support: + - Supports SQL wildcards: _ (single character) and % (multiple characters) + - Example: firstname LIKE 'A%' matches all names starting with 'A' + + Example Usage: +``` + + // Old function syntax (still supported) + source=accounts | where like(firstname, 'Ambe_') +``` + +``` + // New operator syntax (added by this commit) + source=accounts | where firstname LIKE 'Ambe_' + source=accounts | where firstname like 'A%' // Case-insensitive + source=accounts | where firstname Like 'J_hn' // Mixed case works + +``` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +The `like` operator is a massive win, nice! + + +## Review Comments + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java:None` + + +as we plan to improve search command to support full-text search, how can we differenate a is field or keyword user want to search? + + +### @vamsimanohar on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java:None` + + +Good catch. When we change the grammar in the upcoming PR for free text grammar, this test will fall. +I will proactively remove now. + + +### @vamsimanohar on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java:None` + + +Changed tests. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4100: Add EARLIEST/LATEST aggregate functions for PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4100 + +**Author:** @ykmr1224 + +**Created:** 2025-08-21T20:32:14Z + +**State:** MERGED + +**Merged:** 2025-08-28T22:42:54Z + +**Changes:** +1130 -436 (26 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + +- Implements `EARLIEST` and `LATEST` aggregate functions for PPL using a ARG_MIN/ARG_MAX. +- Uses `@timestamp` field to decide the earliest/latest by default. +- It accepts optional argument to specify timestamp field to be used. +- Pushdown for OpenSearch will be implemented in a separate PR. +- Fixed doctest to enable Calcite. +- Refactored test_docs.py and enabled Calcite enabled tests separately from existing Calcite disabled cases. + +### Usage + +```ppl +# Basic usage +source=logs | stats earliest(message), latest(response) by host + +# Custom time field +source=metrics | stats latest(cpu_usage, event_time) by server +``` + +### Related Issues +- https://github.com/opensearch-project/sql/issues/3639 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Could you also check if it's possible to enable both for `eventstats` command too? Maybe in follow-up PR. + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/stats.rst:None` + + +3.3.0? + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Why remove comments? Mistake? + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Is it releated to this PR? + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEarliestLatestTest.java:None` + + +What is difference of testEarliestFunction and testEarliestWithCustomTimeField? + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEarliestLatestTest.java:None` + + +add a test with using default timestamp filed? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:None` + + +missing explain_earliest_latest.json in PR? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java:None` + + +We should also verfiy data + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/MaxByAggFunction.java:None` + + +Can Calcite ARG_MAX/MAX_BY been reused? + + +### @ykmr1224 on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Fixed. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/udf/udaf/MaxByAggFunction.java:None` + + +That's a good point. Used ARG_MAX/ARG_MIN instead. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java:None` + + +Removed this test case as this function is tested in another test. + + +### @penghuo on `integ-test/src/test/resources/expectedOutput/ppl/explain_earliest_latest.json:None` + + +earliest/lastest only for calcite, it also works in V2? + + +### @ykmr1224 on `integ-test/src/test/resources/expectedOutput/ppl/explain_earliest_latest.json:None` + + +Oh, thanks to catch it! I didn't notice this one is left. + + +### @ykmr1224 on `doctest/test_docs.py:24` + + +Did major refactoring on this file, and enabled separate test for Calcite enabled cases. + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEarliestLatestTest.java:110` + + +Not sure if this is bug. I only see min/max_by in SparkSQL. + + +### @ykmr1224 on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEarliestLatestTest.java:110` + + +Ah, I will address this in the next PR. + + +## General Comments + + +### @ykmr1224 + + +Checking doctest failure. + + +### @ykmr1224 + + +> Could you also check if it's possible to enable both for `eventstats` command too? Maybe in follow-up PR. + +Sure, let me address in a follow-up PR. + + +### @ykmr1224 + + +backport blocker: https://github.com/opensearch-project/sql/pull/4094 + + +--- + +# PR #4098: [Backport 2.19-dev] Filter expression with `OR isnull(x)` should be pushed down (#4055) + +**URL:** https://github.com/opensearch-project/sql/pull/4098 + +**Author:** @LantaoJin + +**Created:** 2025-08-21T13:36:58Z + +**State:** MERGED + +**Merged:** 2025-08-22T03:37:30Z + +**Changes:** +104 -4 (10 files) + + +## Description + +(cherry picked from #4055 commit fce8383d56444092eed01b01b889fc6e4be4e810) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4096: [Backport 2.19-dev] [PPL] Patterns command add UUID regex into log template parsing + +**URL:** https://github.com/opensearch-project/sql/pull/4096 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-21T09:11:13Z + +**State:** MERGED + +**Merged:** 2025-08-21T10:24:13Z + +**Changes:** +38 -0 (3 files) + + +## Description + +Backport 4a3b03a2a0fd23f71e93d9ef91a579005b3dfc86 from #3989. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Flaky failure not related. + + +--- + +# PR #4095: Fix value parsing bug. + +**URL:** https://github.com/opensearch-project/sql/pull/4095 + +**Author:** @ishaoxy + +**Created:** 2025-08-21T07:35:25Z + +**State:** MERGED + +**Merged:** 2025-08-26T02:18:28Z + +**Changes:** +106 -7 (2 files) + +**Labels:** `bug`, `backport 2.19`, `backport 2.19-dev` + + +## Description + +### Description +**Problem:** +Certain fields in JSON content were not correctly parsed when numeric or boolean values were represented as strings. + +**What I did:** +Added `parseLongValue`, `parseDoubleValue`, and `parseBooleanValue` methods to safely handle: + +- Numeric strings (e.g., "123" → 123) + +- Boolean strings ("true" → true; "false" → false) + +**Special Handling:** +Empty string (`""`) is parsed as: + - **0** for numeric types (long, int, double, etc.) + + - **false** for boolean type + +### Related Issues +Resolves #3001 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java:190` + + +Shall we throw a legible error if the text cannot be parsed as double? + +If it already does so, please add such a test. + + +### @ishaoxy on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java:190` + + +The scheme restricts the written value to be able to be parsed into the specific type, otherwise an error will be reported. Therefore, this part of the code will definitely receive a field that can be parsed into a double type. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java:174` + + +Can you also include such cases in the tests? + + +### @ishaoxy on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java:174` + + +Sure, added. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4094: Add documents on how to develop a UDF / UDAF + +**URL:** https://github.com/opensearch-project/sql/pull/4094 + +**Author:** @yuancu + +**Created:** 2025-08-21T07:23:19Z + +**State:** MERGED + +**Merged:** 2025-08-28T03:48:12Z + +**Changes:** +442 -245 (12 files) + +**Labels:** `documentation`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +This PR creates a dev doc *Developing PPL Functions* under folder docs/dev. + +Additionally, it refactors the registration of aggregation functions to simplify its logic, making it similar to the creation and registration of non-aggregation functions to reduce learning barriers. + +### Related Issues +Resolves #4043 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +~~why this PR includes code refactoring?~~ + + +## Review Comments + + +### @LantaoJin on `DEVELOPER_GUIDE.rst:None` + + +This will break the the [link](https://github.com/opensearch-project/sql/blob/fcdb7882132442e8ad88c15535f3e82f7d979264/.github/PULL_REQUEST_TEMPLATE.md?plain=1#L13). + +I think we you can add a new section out of `New PPL Command Checklist` + + +### @yuancu on `DEVELOPER_GUIDE.rst:None` + + +I thought it was too heavy to have two sections for developing functions and commands, considering most of other sections are high level instructions. + +But somehow I cannot add lower level sections to this document. I updated them to separate sections. + + +### @yuancu on `DEVELOPER_GUIDE.rst:None` + + +Modified by LLM unintentionally, should be ok though + + +### @qianheng-aws on `DEVELOPER_GUIDE.rst:None` + + +Please call out that developers should ensure their UDF calculating logic is efficient by implementing data-irrelevant logic during the compilation phase rather than at runtime. In other words, that part of logic should preferably use ling4j's expression instead of internal static method calls. + + +### @LantaoJin on `DEVELOPER_GUIDE.rst:None` + + +Is the new description really better than the original? + +The LLM generated content changed the level of requirements. Some info I tried to callout is missing. For example, offline meeting is not necessary, syntax, usage and examples are minimal requirements of RFC, implementation details are not necessary in RFC, etc. + +Can you revert the LLM generated suggestions except grammar fixing? + + +### @yuancu on `DEVELOPER_GUIDE.rst:None` + + +Sorry for the unnecessary change. I have reverted them. + + +### @yuancu on `DEVELOPER_GUIDE.rst:None` + + +Thanks. I added such comments. I'm thinking about adding an example so that it will be easier to comprehend. Does the following example fit the case? + +```java +@Override +public Expression implement(RexToLixTranslator translator, RexCall call, List translatedOperands) { + // Don't + SqlTypeName type = call.getOperands().getFirst().getType().getTypeName(); + List operands = Lists.concat(translatedOperands, List.of(Expressions.constant(type)); + return Expressions.call(Implementor.class, "allInOneOp", operands) + + // Do + String opName = type == FLOAT ? "floatOp" : "integerOp"; + return Expressions.call(Implementor.class, , translatedOperands); +} + +public static int allInOneOp(Number n, SqlTypeFamily t){ + if (t == FLOAT) { ... } + else if (t == INTEGER) { .... } +} + +public static int floatOp(float n){ + ... +} + +public static int integerOp(integer n) { + ... +} +``` + + +## General Comments + + +### @Swiddis + + +This should go in the dev docs at `docs/dev` instead of being put directly in the developer guide. + +Dev guide should be more high level on how to build the project and maybe what the core entrypoints are. Making this its own doc will also give more room to add detail in e.g. lines like "Creating UDFs: A user-defined function is an instance of `SqlOperator`" (What is `SqlOperator`?) + + +### @yuancu + + +> This should go in the dev docs at `docs/dev` instead of being put directly in the developer guide. +> +> Dev guide should be more high level on how to build the project and maybe what the core entrypoints are. Making this its own doc will also give more room to add detail in e.g. lines like "Creating UDFs: A user-defined function is an instance of `SqlOperator`" (What is `SqlOperator`?) + +That makes sense. I have moved both the guidance for commands and functions from DEVELOPER_GUIDE to docs/dev + + +### @ykmr1224 + + +> @yuancu +Should we backport this to 2.19-dev? +It is causing backport failure for https://github.com/opensearch-project/sql/pull/4100 + + +--- + +# PR #4093: [Backport 2.19-dev] Make fields optional parameter in multi field relevance function. + +**URL:** https://github.com/opensearch-project/sql/pull/4093 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-21T05:36:18Z + +**State:** MERGED + +**Merged:** 2025-08-21T09:29:16Z + +**Changes:** +715 -76 (25 files) + + +## Description + +Backport 5c0ed0dfd06926b90350b06791af1be79f9a1960 from #4018. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4092: Add regex_match function for PPL with Calcite engine support + +**URL:** https://github.com/opensearch-project/sql/pull/4092 + +**Author:** @vamsimanohar + +**Created:** 2025-08-21T05:16:01Z + +**State:** MERGED + +**Merged:** 2025-08-26T17:37:19Z + +**Changes:** +465 -2 (11 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `v3.3.0` + +**Assignees:** @vamsimanohar + + +## Description + +This commit introduces the `regex_match` function for PPL (Piped Processing Language) that enables regular expression pattern matching against strings. The function returns TRUE if the regex pattern finds a match against any substring of the input string, FALSE otherwise. + +## Key Changes: + +### Core Implementation: +- Added `REGEX_MATCH` to BuiltinFunctionName enum +- Registered function in PPLFuncImpTable mapping to Calcite's REGEXP_CONTAINS operator +- Function signature: `regex_match(string, pattern) -> boolean` + +### Documentation: +- Added complete documentation in docs/user/ppl/functions/string.rst +- Includes usage description and examples for IP validation, substring matching, and email validation + +## Examples: + +```ppl +# Filter employees with names ending in 'ES' +source=employees | where regex_match(name, 'ES$') | fields name + +# IP address validation +source=logs | eval is_ip = regex_match(field, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') + +# Use in conditional expressions +source=data | eval result = if(regex_match(test, "pattern"), 1, 0) +``` + +## Technical Details: +- Uses Java regular expression syntax for patterns +- Maps to Calcite's REGEXP_CONTAINS for SQL generation +- Optimized for Calcite engine only (not implemented for legacy engine) +- Supports all standard regex features including lookarounds, boundaries, and flags + + +### Related Issues + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - APPROVED + + +LGTM + + +## Review Comments + + +### @penghuo on `docs/user/ppl/functions/condition.rst:441` + + +Add since 3.3 + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java:24` + + +Add an IT for explain, Does it use script pushdown? + + +### @vamsimanohar on `docs/user/ppl/functions/condition.rst:441` + + +added + + +### @vamsimanohar on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java:24` + + +Added. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java:None` + + + @LantaoJin Should add Explain to CalciteExplainIT? https://github.com/opensearch-project/sql/blob/main/DEVELOPER_GUIDE.rst. + + +### @vamsimanohar on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java:None` + + +Sure, I am missing few other things. Will add them + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java:63` + + +nit: unrelated change + + +## General Comments + + +### @LantaoJin + + +@yuancu you are working on https://github.com/opensearch-project/sql/pull/4094, could you help to review this PR? + + +### @vamsimanohar + + +https://github.com/opensearch-project/sql/issues/4114 +WIll take DSL pushdown as followup. + + +--- + +# PR #4091: FollowUp: Retry annotation should work on class level + +**URL:** https://github.com/opensearch-project/sql/pull/4091 + +**Author:** @LantaoJin + +**Created:** 2025-08-21T03:42:13Z + +**State:** MERGED + +**Merged:** 2025-08-22T04:01:15Z + +**Changes:** +8 -1 (2 files) + +**Labels:** `flaky-test`, `testing` + + +## Description + +### Description +This is a followup fixing of https://github.com/opensearch-project/sql/pull/4060. With the commit 95f32d0d8ea5e67abdb27523820ddca42b960538, the retry annotation cannot work on class level. + +The testQ7 could still fail 3 times with socket timeout, ignore it in macOS. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +No need to backport to 2.19-dev since the fixing already included in https://github.com/opensearch-project/sql/pull/4089 + + +--- + +# PR #4090: fix: Update System.env syntax for Gradle 9 compatibility + +**URL:** https://github.com/opensearch-project/sql/pull/4090 + +**Author:** @dbwiddis + +**Created:** 2025-08-21T01:57:03Z + +**State:** MERGED + +**Merged:** 2025-08-28T16:02:21Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `maintenance` + + +## Description + +This PR updates the Gradle build files to fix compatibility with Gradle 9. + +## Problem +The current syntax using `$System.env.VARIABLE_NAME` for accessing environment variables is not compatible with Gradle 9. + +This format is used in this repository for Sonatype credentials for SNAPSHOT publication. This fails with under Gradle 9. +```gradle +credentials { + username = "$System.env.SONATYPE_USERNAME" + password = "$System.env.SONATYPE_PASSWORD" +} +``` + +See [example failed workflow here](https://github.com/opensearch-project/opensearch-remote-metadata-sdk/actions/runs/17023398832/job/48255946880#step:5:74). + +## Solution +Update to use the `System.getenv()` method instead: +```gradle +credentials { + username = System.getenv("SONATYPE_USERNAME") + password = System.getenv("SONATYPE_PASSWORD") +} +``` + +Example fix which restored successful snapshots: https://github.com/opensearch-project/opensearch-remote-metadata-sdk/pull/245 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4089: [Backport 2.19-dev] Add flaky retry on CalcitePPLTpchIT + +**URL:** https://github.com/opensearch-project/sql/pull/4089 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-21T00:27:37Z + +**State:** MERGED + +**Merged:** 2025-08-21T15:49:00Z + +**Changes:** +79 -1 (4 files) + + +## Description + +Backport be3874038142dec6e8319d19344c0bc797ac15a1 from #4060. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +I am looking the failures, it should be retried with the fixing. + + +### @LantaoJin + + +> I am looking the failures, it should be retried with the fixing. + +It seems a bug introduced by 95f32d0d8ea5e67abdb27523820ddca42b960538, I will fix it. + + +### @LantaoJin + + +The retry mechanism works, but the q7 failed with socket timeout 3 times in retry. Ignore it in macOS. + + +--- + +# PR #4088: disable commit-history uploading to maven + +**URL:** https://github.com/opensearch-project/sql/pull/4088 + +**Author:** @ahkcs + +**Created:** 2025-08-20T23:44:19Z + +**State:** MERGED + +**Merged:** 2025-08-21T13:19:31Z + +**Changes:** +14 -0 (1 files) + +**Labels:** `infrastructure` + + +## Description + +## Description +Disable commit-history file creation in snapshot publishing workflow to comply with Maven Central repository restrictions. + +## Why this change is needed +Maven Central Sonatype repository enforces strict rules about the types of files that can be uploaded to their snapshot repository. + +Our custom commit-history-*.json files are being rejected by Maven Central as they are non-standard artifacts + +What still works + +- Snapshot artifacts are published normally +- Commit IDs are still injected into maven-metadata.xml files for traceability +- All standard Maven repository operations continue to function + +What is disabled + +- Creation of commit-history JSON files in the artifact structure +- Upload of commit mapping files to the repository + +Tested on #4087 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4087: disable commit-history + +**URL:** https://github.com/opensearch-project/sql/pull/4087 + +**Author:** @ahkcs + +**Created:** 2025-08-20T23:37:48Z + +**State:** MERGED + +**Merged:** 2025-08-20T23:39:08Z + +**Changes:** +2 -2 (2 files) + + +## Description + +## Description +Disable commit-history file creation in snapshot publishing workflow to comply with Maven Central repository restrictions. + +## Why this change is needed +Maven Central Sonatype repository enforces strict rules about the types of files that can be uploaded to their snapshot repository. + +Our custom commit-history-*.json files are being rejected by Maven Central as they are non-standard artifacts + +What still works + +- Snapshot artifacts are published normally +- Commit IDs are still injected into maven-metadata.xml files for traceability +- All standard Maven repository operations continue to function + +What is disabled + +- Creation of commit-history JSON files in the artifact structure +- Upload of commit mapping files to the repository + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4086: disable commit-history + +**URL:** https://github.com/opensearch-project/sql/pull/4086 + +**Author:** @ahkcs + +**Created:** 2025-08-20T23:17:06Z + +**State:** MERGED + +**Merged:** 2025-08-20T23:30:31Z + +**Changes:** +20 -3 (1 files) + + +## Description + +## Description +Disable commit-history file creation in snapshot publishing workflow to comply with Maven Central repository restrictions. + +## Why this change is needed +Maven Central Sonatype repository enforces strict rules about the types of files that can be uploaded to their snapshot repository. + +Our custom commit-history-*.json files are being rejected by Maven Central as they are non-standard artifacts + +What still works + +- Snapshot artifacts are published normally +- Commit IDs are still injected into maven-metadata.xml files for traceability +- All standard Maven repository operations continue to function + +What is disabled + +- Creation of commit-history JSON files in the artifact structure +- Upload of commit mapping files to the repository + +Tested on #1265 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4085: PPL percentile function shortcut `perc()` and `p()` support + +**URL:** https://github.com/opensearch-project/sql/pull/4085 + +**Author:** @aalva500-prog + +**Created:** 2025-08-20T21:12:07Z + +**State:** MERGED + +**Merged:** 2025-08-26T17:43:27Z + +**Changes:** +365 -32 (8 files) + +**Labels:** `enhancement`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Currently, PPL lacks percentile shortcut functions `perc()` and `p()`. The purpose of this PR is to support these shortcuts and rewrite them to `percentile(, )`. + +### Related Issues +Resolves #[4051](https://github.com/opensearch-project/sql/issues/4051) + + +### Check List +- [✅] New functionality includes testing. +- [✅] New functionality has been documented. +- [✅] New functionality has javadoc added. +- [✅] New functionality has a user manual doc added. +- [✅] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/DEVELOPER_GUIDE.rst#new-ppl-command-checklist) all confirmed. +- [✅] Commits are signed per the DCO using `--signoff` or `-s`. +- [✅] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +## Review Comments + + +### @dai-chen on `docs/user/dql/aggregations.rst:None` + + +we should also mention the valid range from 0 to 100? + + +### @dai-chen on `docs/user/dql/aggregations.rst:414` + + +np: maybe change one of the example to decimal like 99.5 + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +Is it possible to reuse `DECIMAL_LITERAL` for the `[0-9]+ ('.' [0-9]+)?` part? Any difference? + +I recall this has to precede ID right? Could you also add to comment? + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +np: `percentile` seems missing in `AstDSL` but there is `AstDSL.doubleLiteral` API. + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:277` + + +Could you cover this in test as well? + + +### @aalva500-prog on `docs/user/dql/aggregations.rst:None` + + +Sure, I'll do that, thanks! + + +### @aalva500-prog on `docs/user/dql/aggregations.rst:414` + + +Sure, I'll do that, thanks! + + +### @aalva500-prog on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +sure, I can do that, thanks! + + +### @aalva500-prog on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:277` + + +sure, I can do that, thanks! + + +### @aalva500-prog on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +You're absolutely right! The visitPercentileShortcutFunctionCall method should use AstDSL.doubleLiteral() instead of creating a new literal. Good catch, thanks! + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +cannot simplify to `PERC(INTEGER_LITERAL | DECIMAL_LITERAL) | P(INTEGER_LITERAL | DECIMAL_LITERAL)`? we have set `caseInsensitive = true` in lexer's option + + +### @LantaoJin on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:64` + + +unused imports? + + +### @aalva500-prog on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:None` + + +@LantaoJin I can do something like this `PERC(INTEGER_LITERAL | DECIMAL_LITERAL) | 'P'(INTEGER_LITERAL | DECIMAL_LITERAL);` with `P` as a string literal to avoid token conflicts while still benefiting from case-insensitive matching. + + +### @aalva500-prog on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:64` + + +I have removed the unused imports. There are some of them used in the file, though. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4084: Support distinct_count/dc in eventstats + +**URL:** https://github.com/opensearch-project/sql/pull/4084 + +**Author:** @ahkcs + +**Created:** 2025-08-20T16:38:08Z + +**State:** MERGED + +**Merged:** 2025-08-22T20:26:10Z + +**Changes:** +167 -24 (7 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR implements support for `distinct_count()` and `dc()` functions in the PPL `eventstats` command, enabling users to calculate distinct counts as window functions with optional partitioning. + + ## Usage Examples + + ```sql + -- Basic distinct count + source=accounts | eventstats dc(state) as distinct_states + + -- Using distinct_count function name + source=accounts | eventstats distinct_count(country) as unique_countries + + -- With partitioning + source=accounts | eventstats dc(state) as state_count by gender +``` + + ##Query Translation + +``` + Input: source=accounts | eventstats dc(state) by gender + Translates to: APPROX_DISTINCT_COUNT(state) OVER (PARTITION BY gender) +``` + +### Related Issues +Resolves #4052 + +### Check List + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Could you add UT in `CalcitePPLEventstatsTest` as well? + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +### @yuancu - APPROVED + + +LGTM + + +### @dai-chen - APPROVED + + +Thanks for the changes! + +Please open a follow up PR to clarify the accuracy in documentation. Also we can consider extract aggregation functions doc shared by both stats and eventstats command. + + +## Review Comments + + +### @dai-chen on `docs/user/ppl/cmd/eventstats.rst:None` + + +np: probably we can use one example here for both distinct_count and dc. + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Is `scalarWindowFunctionName` the rule for all window functions? + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Is this change still required if we add both to `scalarWindowFunctionName` in above comment? + + +### @ahkcs on `docs/user/ppl/cmd/eventstats.rst:None` + + +Updated + + +### @ahkcs on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Updated to use under `scalarWindowFunctionName` + + +### @ahkcs on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +Removed change + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +This was missing previously? How this DISTINCT_COUNT_APPROX function registered previously? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +It was previously registered here: OpenSearchExecutionEngine.java:281-287 +I added registration in PPLFuncImpTable for Unit Tests + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Is it registered twice? Should we remove it in OpenSearchExecutionEngine? cc: @yuancu + + +### @penghuo on `docs/user/ppl/cmd/eventstats.rst:None` + + +Add since 3.3 in doc + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Seems like removing it on `OpenSearchExecutionEngine` would cause integration test to fail + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Also I'm a little confused after another look at our `distinct_count` and `distinct_count_approx` function. Here we translate all to `distinct_count_approx` because Calcite window function doesn't support `COUNT(DISTINCT)`. Need to confirm if: + +1. There is inconsistency in stats with or without pushdown +2. We either clarify all distinct function estimated or avoid pushing down for accurate result as SPL's + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Hi @xinyual and @yuancu, + +Could you help confirm the current behavior of the `dc` (distinct count) function in the `stats` command under two scenarios: + +1. **Pushdown enabled** +2. **Pushdown disabled** + +Specifically: + +* Is the result of the `dc` function in `stats` exact or approximate in each case? +* For context, in the `eventstats` implementation, `dc` currently returns approximate results. I'd like to know if the same holds true for `stats` both with and without pushdown. + +Thanks! + + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +@dai-chen Discussed with @xinyual , the primary reason that `DISTINCT_COUNT_APPROX` is registered in `OpenSearchExecutionEngine` is because it relies on a library from OpenSearch: `org.opensearch.search.aggregations.metrics.HyperLogLogPlusPlus`. PPL tries to not depend on any data engine specific implementation. Therefore, the function is registered externally in `OpenSearchExecutionEngine`. + +This discussion may be helpful: https://github.com/opensearch-project/sql/pull/3654#discussion_r2115053978 + + +For your questions: + +> There is inconsistency in stats with or without pushdown + +No, currently the `distinct_count_approx` in ppl uses the same implementation as in OpenSearch DSL(with `org.opensearch.search.aggregations.metrics.HyperLogLogPlusPlus`). + +> We either clarify all distinct function estimated or avoid pushing down for accurate result as SPL's + +The approximation behavior is described here: [Cardinality aggregations - OpenSearch Doucmentation](https://docs.opensearch.org/latest/aggregations/metric/cardinality/#controlling-precision). Although it should be fine if we leave out this description since it aligns with OpenSearch's behavior, it is indeed better to mention the approximated behavior in the document. + + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +@ahkcs I think you can use the existing implementation of `APPROX_COUNT_DISTINCT`, without the need to register a new one with Calcite's implementation, which may cause inconsistency with the pushed-down results. You can do so by simply removing this register statement. + + +### @yuancu on `integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json:4` + + +It's not pushed down. Reusing existing implementation may solve this. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +@yuancu Thanks for the feedback! I tried removing the register statement as suggested, but it causes "Cannot resolve function: DISTINCT_COUNT_APPROX" errors during Calcite query planning, even though the OpenSearch implementation exists in OpenSearchExecutionEngine.java. +In my initial implementation, I didn't have this register statement but the added unit tests requires this registration + + +### @ahkcs on `integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json:4` + + +@yuancu You're right that it's not being pushed down. However, removing the registration in PPLFuncImpTable won't solve the push-down issue. The CalciteExplainIT goes through the external OpenSearch execution path and would use the existing implementation in `OpenSearchExecutionEngine.java` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Hmm... that's weird. After I removed the function you registered, I tried to run this query: `source=opensearch-sql_test_index_account | eventstats dc(age) as unique_age by state`. It gets executed without a problem. It resolves to the function that we registered externally. + +**Update**: I found that unit tests will indeed fail. This is because the function is registered externally when starting the cluster with OpenSearch engine, yet there isn't really any cluster spin up in the unit tests. + +I think we can let go of the unit tests? There is no need to introduce such duplication / inconsistency for the sake of logical plan validation -- you can validate it in explain IT instead. + + +### @yuancu on `integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json:4` + + +`stats dc(field)` can be pushed down -- `source=opensearch-sql_test_index_account | stats dc(age) as unique_age` is explained as the following plan: + +``` +{ + "calcite": { + "logical": """LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{}], unique_age=[COUNT(DISTINCT $0)]) + LogicalProject(age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) +""", + "physical": """EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#22:LogicalAggregate.NONE.[](input=RelSubset#21,group={},unique_age=COUNT(DISTINCT $0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"unique_age":{"cardinality":{"field":"age"}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +""" + } +} +``` + +~~You can imitate how `stats dc(field)` is pushed down to enable the same for `eventstats dc(field)`.~~ + +**Update**: I found that all of the existing `eventstats agg_function(field)` are not pushed down. Maybe it's hard to push down window functions.. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Updated implementation to remove UT and registration + + +## General Comments + + +### @ahkcs + + +> Could you add UT in `CalcitePPLEventstatsTest` as well? + +Added UT + + +### @dai-chen + + +Hi @yuancu @ahkcs Based on my testing below, when `distinct_count` is not pushed down, Calcite executes it using its built-in implementation, which produces accurate results. When it is pushed down, it is translated into an OpenSearch cardinality aggregation, which returns approximate results. Is that correct? + +| Command | **Without Pushdown** | **With Pushdown** | +|---|---|---| +| `stats distinct_count(field)` | **Exact** `COUNT(DISTINCT field)` computed in Calcite. | Uses `cardinality` aggregation: **close to accurate** if distinct ≤ `precision_threshold`, otherwise **approximate**. | +| `eventstats distinct_count(field)` | Currently uses the same HLL++ as cardinality aggregation via `distinct_count_approx` | N/A (Not clear how to push down window function yet) | + + +
    +# With pushdown
    +opensearchsql> explain source=accounts | stats distinct_count(age);
    +OpenSearchException({'calcite': {
    +'logical': 'LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n
    +  LogicalAggregate(group=[{}], distinct_count(age)=[COUNT(DISTINCT $0)])\n
    +    LogicalProject(age=[$2])\n
    +      CalciteLogicalIndexScan(table=[[OpenSearch, accounts]])\n',
    +'physical': 'EnumerableLimit(fetch=[10000])\n
    +  CalciteEnumerableIndexScan(table=[[OpenSearch, accounts]],
    +    PushDownContext=[[AGGREGATION->rel#362:LogicalAggregate.NONE.[](input=RelSubset#361,
    +      group={},distinct_count(age)=COUNT(DISTINCT $0))],
    +      OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m",
    +        "aggregations":{"distinct_count(age)":{"cardinality":{"field":"age"}}}},
    +        requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n'}})
    +
    +# Without pushdown
    +opensearchsql> explain source=accounts | stats distinct_count(age);
    +OpenSearchException({'calcite': {
    +'logical': 'LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n
    +  LogicalAggregate(group=[{}], distinct_count(age)=[COUNT(DISTINCT $0)])\n
    +    LogicalProject(age=[$2])\n
    +      CalciteLogicalIndexScan(table=[[OpenSearch, accounts]])\n',
    +'physical': 'EnumerableLimit(fetch=[10000])\n
    +  EnumerableAggregate(group=[{}], distinct_count(age)=[COUNT($0)])\n
    +    EnumerableAggregate(group=[{2}])\n
    +      CalciteEnumerableIndexScan(table=[[OpenSearch, accounts]])\n'}})
    +
    + +If there is inconsistency, I'm thinking the option may include: + +1. **Keep current implementation**: Document that distinct_count / dc return exact (or close-to-exact) results for small cardinalities (≤ 3k by default) and approximate results above that. Ref: https://docs.opensearch.org/latest/aggregations/metric/cardinality/#controlling-precision +2. **Use accurate count function consistently**: Always compute exact distinct counts, either by reusing Calcite’s built-in implementation or by registering a new function. Document that this may lead to heavy resource usage and need to benchmark what's the performance. + +cc: @penghuo @LantaoJin + + +### @penghuo + + +> Keep current implementation: Document that distinct_count / dc return exact (or close-to-exact) results for small cardinalities (≤ 3k by default) and approximate results above that. Ref: https://docs.opensearch.org/latest/aggregations/metric/cardinality/#controlling-precision + ++1, agree, it is short-term solution, dc/est_dc is same when pushdown to OpenSearch. + + +--- + +# PR #4083: [Feature] Implementation of `regex` Command In PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4083 + +**Author:** @RyanL1997 + +**Created:** 2025-08-20T08:22:08Z + +**State:** MERGED + +**Merged:** 2025-08-30T03:02:47Z + +**Changes:** +945 -25 (26 files) + +**Labels:** `PPL`, `feature`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + Implementation of `regex` Command In PPL + +### Details: +- Core logic of `regex` +```bash + | [commands] | regex = | [commands] + | [commands] | regex != | [commands] +``` + +- field: The field name to apply regex matching against +- pattern: Java regex pattern string (supports standard regex metacharacters) +- `!=`: Negated matching - returns records that do NOT match the pattern +- `=`: Positive matching - returns records that match the pattern + +For the handling of default field (`_source` / `_raw`): +- Tracking at issue https://github.com/opensearch-project/sql/issues/4111 + +### Related Issues +* Relate https://github.com/opensearch-project/sql/issues/4082 + + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +This method seems unused + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +If we are going to use this operator, I think we don't need another alias + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +good catch, this was some left over from my original poc. Fixed. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +fixed + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Regex.java:None` + + +Should we define constants for `!=` and `=`? I see multiple use of them. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/operator/predicate/RegexMatch.java:None` + + +Should we eliminate other product name? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/operator/predicate/RegexMatch.java:None` + + +Could this actually happen? In that case, is this behavior align with requirements? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/operator/predicate/RegexMatch.java:None` + + +Should `negated` affect this result? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +I think we should use LinkedHashMap to remove oldest one. (This implementation remove everything once reached max size.) + +We might be able to use LRUMap from Apache commons. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexExpression.java:None` + + +nit: Let's don't put redundant comment. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexExpression.java:None` + + +Why do we need to delegate? Should we directly call the delegated method from the class which calls this method? + + +### @ykmr1224 on `docs/user/ppl/cmd/regex.rst:2` + + +Let's note supported version. + + +### @ykmr1224 on `docs/user/ppl/cmd/regex.rst:None` + + +Can we clarify about `default field`? + + +### @ykmr1224 on `docs/user/ppl/cmd/regex.rst:None` + + +Does `by default` indicate it can be changed? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/ast/tree/Regex.java:None` + + +fixed + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/operator/predicate/RegexMatch.java:None` + + +good catch, fixed + + +### @RyanL1997 on `docs/user/ppl/cmd/regex.rst:None` + + +This is a good question, since we are having this tracking issue (https://github.com/opensearch-project/sql/issues/4111) to track the default field's on going design, do you think we should mention the current limitation in the doc just for now? The current limitation is that "for rex and regex, user have to specified a field". cc @penghuo + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/operator/predicate/RegexMatch.java:None` + + +Nice catch, I rechecked the logic: +- The field null/missing checks are necessary and correct - they filter out documents that don't have the field being searched, which is standard regex filtering behavior and throw an exception until #4111 is implemented. +```bash +curl -X POST "localhost:9200/_plugins/_ppl" -H 'Content-Type: application/json' -d'{"query": "source=accounts | regex \"*.on$\" "}' | jq + + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 219 100 172 100 47 257 70 --:--:-- --:--:-- --:--:-- 327 +{ + "error": { + "reason": "Invalid Query", + "details": "Regex command requires a field to be specified", + "type": "IllegalArgumentException" + }, + "status": 400 +} +``` + +- Removed patternValue.isNull() and patternValue.isMissing() checks since patterns are always literal strings in the current implementation +- Kept only the field null/missing checks which are necessary and correct + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/operator/predicate/RegexMatch.java:None` + + +The field validation happens at query analysis time before our code executes, which catches non-existent fields early. Our null/missing checks handle the runtime cases where fields exist in the schema but have null/missing values in specific documents. + +So, negation correctly doesn't affect this behavior - null/missing values are filtered out in both regular and negated regex, which aligns with standard filtering semantics. + +```bash +# Case 1: = +❯ curl -X POST "localhost:9200/_plugins/_ppl" \ + -H 'Content-Type: application/json' \ + -d'{"query": "source=accounts | regex middlename=\".*\" "}' | jq + +{ + "error": { + "reason": "Invalid Query", + "details": "field [middlename] not found; input fields are: [account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _id, _index, _score, _maxscore, _sort, _routing]", + "type": "IllegalArgumentException" + }, + "status": 400 +} + +# Case 2: != +❯ curl -X POST "localhost:9200/_plugins/_ppl" \ + -H 'Content-Type: application/json' \ + -d'{"query": "source=accounts | regex middlename!=\".*\" "}' | jq + +{ + "error": { + "reason": "Invalid Query", + "details": "field [middlename] not found; input fields are: [account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _id, _index, _score, _maxscore, _sort, _routing]", + "type": "IllegalArgumentException" + }, + "status": 400 +} +``` + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:None` + + +Thanks for pointing it out. Implemented proper LRU cache using `LinkedHashMap` with access-order tracking and automatic eviction of oldest entries. This replaces the previous 'clear everything' approach with efficient removal of only the least recently used patterns when cache limit is reached. The implementation is thread-safe using `Collections.synchronizedMap`. Could you take a look again? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexExpression.java:None` + + +fixed. + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexExpression.java:None` + + +Good catch. Removed the unnecessary delegation layer. `ParseUtils` now calls `RegexCommonUtils.getNamedGroupCandidates()` directly instead of going through `RegexExpression.getNamedGroupCandidates()`. This eliminates the wrapper method that added no value and simplifies the call chain. + + +### @RyanL1997 on `docs/user/ppl/cmd/regex.rst:None` + + +I've updated the documentation to be more accurate. The case sensitivity can be changed using Java's built-in inline flags like `(?i)` for case-insensitive matching, `(?m)` for multiline mode, `(?s)` for dotall mode, etc. This isn't something we implemented ourselves - it's standard Java regex functionality that we inherit through `Pattern.compile()` + + I've updated the documentation to: + - Remove the confusing 'by default' language + - List common inline flags with examples + - Add a reference to the official Java Pattern documentation (https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html) for complete coverage of all available modes and features + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Regex.java:None` + + +nit: Let's remove redundant comments (I see many). Those would be visual noise for reader, and could waste tokens for LLMs. I'd recommend to add common instruction for LLM to avoid unneeded comments. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java:20` + + +Let's add unit tests. + + +### @ykmr1224 on `docs/user/ppl/cmd/regex.rst:None` + + +If it is mandatory now, let's describe it as mandatory. We should update it once we made it optional. + + +### @ykmr1224 on `docs/user/ppl/cmd/regex.rst:None` + + +Do we need to note this here? I don't see much risk to just cache 1000 regex patterns. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRegexCommandIT.java:None` + + +Is it meant to execute test defined in RegexCommandIT? It looks confusing to me. + + +### @ykmr1224 on `integ-test/src/test/java/org/opensearch/sql/ppl/RegexCommandIT.java:None` + + +Is it intentionally enabled here as well? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Regex.java:None` + + +Should we receive `nagated` as a parameter? (Why do we need to convert from String here?) + + +## General Comments + + +### @ahkcs + + +Could you add `CrossClusterSearchIT` and `PPLQueryDataAnonymizerTest` as well? + + +### @RyanL1997 + + +> Could you add CrossClusterSearchIT and PPLQueryDataAnonymizerTest as well? + +~~Will add unit test and these test soon since i would like to have the general core logic to be reviewed first.~~ + +Added. + + +### @RyanL1997 + + +Will manual backport it. + + +### @ykmr1224 + + +Can you do manual backport or see what is conflicting? +It is blocking https://github.com/opensearch-project/sql/pull/4214 + + +--- + +# PR #4081: [Backport 2.19-dev] Support pushdown dedup with Calcite (#3972) + +**URL:** https://github.com/opensearch-project/sql/pull/4081 + +**Author:** @LantaoJin + +**Created:** 2025-08-20T07:32:12Z + +**State:** MERGED + +**Merged:** 2025-08-21T02:04:04Z + +**Changes:** +455 -54 (23 files) + + +## Description + +(cherry picked from #3972 commit f358e5d619a8b997306e3d31b2be0a78586d4e8a) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4080: [Backport 2.19-dev] Push down IP comparison as range query with Calcite (#3959) + +**URL:** https://github.com/opensearch-project/sql/pull/4080 + +**Author:** @yuancu + +**Created:** 2025-08-20T06:08:49Z + +**State:** MERGED + +**Merged:** 2025-08-20T09:41:26Z + +**Changes:** +162 -41 (11 files) + + +## Description + +### Description + +Backport #3959 to 2.19-dev + +### Commit Message +* Add reverse op for compare ip to support pushdown + + + +* Pushdown ip comparison + + + +* Refactor CompareIpFunction to use SqlKind directly + + + +* Simplify the overriding of reverse() for IP comparators + + + +--------- + + +(cherry picked from commit e2375febc03ac6788ad8a7c22e7818148da532f0) + +### Related Issues +Resolves #3937 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4077: Fix CI failure because of plan having changed + +**URL:** https://github.com/opensearch-project/sql/pull/4077 + +**Author:** @qianheng-aws + +**Created:** 2025-08-20T02:17:13Z + +**State:** MERGED + +**Merged:** 2025-08-20T04:02:52Z + +**Changes:** +1 -2 (1 files) + +**Labels:** `bug`, `backport 2.19-dev` + + +## Description + +### Description +Fix because the plan of test case `testSkipScriptEncodingOnExtendedFormat` changed from `LIMIT->CALC->SCAN` to `SCAN` + +### Related Issues + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4075: Add example for String concat in eval.rst + +**URL:** https://github.com/opensearch-project/sql/pull/4075 + +**Author:** @ahkcs + +**Created:** 2025-08-19T19:45:39Z + +**State:** MERGED + +**Merged:** 2025-08-20T00:21:37Z + +**Changes:** +35 -0 (1 files) + +**Labels:** `documentation` + + +## Description + +Modified eval.rst file to add an example for String concatenation + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Could you check why many CI failures? + + +### @Swiddis - CHANGES_REQUESTED + + +Fails doctest + +thought (future,non-blocking): We could probably stand to separate doctest into its own action? It takes a while so we'd benefit from running it in parallel, and it'd make these failures more obvious. + +Also, the CLI rewrite might end up messing with doctest, so it may be a good investment anyway 😓 + + +### @Swiddis - APPROVED + + +Can auto-merge on passing CI + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +> Could you check why many CI failures? + +Aaron was tracking the CI failure, it's caused by https://github.com/opensearch-project/sql/pull/4010 + + +--- + +# PR #4074: [Backport 2.19-dev] Enhance sort command in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4074 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-19T16:17:31Z + +**State:** MERGED + +**Merged:** 2025-08-19T20:35:15Z + +**Changes:** +495 -34 (33 files) + + +## Description + +Backport c8d2694e0cde707ab06e5e0197298cc348c89b14 from #3934. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4073: Add new command checklist doc + +**URL:** https://github.com/opensearch-project/sql/pull/4073 + +**Author:** @LantaoJin + +**Created:** 2025-08-19T09:48:15Z + +**State:** MERGED + +**Merged:** 2025-08-20T18:18:36Z + +**Changes:** +52 -1 (2 files) + +**Labels:** `documentation` + + +## Description + +### Description +Add new command checklist doc and add a item in PR checklist + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `DEVELOPER_GUIDE.rst:225` + + +It is general guidance for all new features (command, function, enhancement), Do we need to highlight in new command checklist? + + +### @yuancu on `DEVELOPER_GUIDE.rst:225` + + +I'll write a separate section called *New PPL Function Checklist* + + +## General Comments + + +### @LantaoJin + + +cc @yuancu @qianheng-aws + + +--- + +# PR #4072: [Backport 2.19-dev] Support script push down on text field + +**URL:** https://github.com/opensearch-project/sql/pull/4072 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-19T09:14:12Z + +**State:** MERGED + +**Merged:** 2025-08-21T01:57:06Z + +**Changes:** +203 -85 (11 files) + + +## Description + +Backport bb1a644cb089d7bb9a613de0ee6b8b6a5bfe6f22 from #4010. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +@qianheng-aws IT failed. + + +### @qianheng-aws + + +CI failed because the `fieldReader` in `SourceLookUp` is not set correctly, due to https://github.com/opensearch-project/OpenSearch/pull/17927 isn't backport to 2.19 + +~~Therefore, won't backport this PR to 2.19 as well.~~ + +Find a way to set document and segment to the SourceLook by ourself. + + +--- + +# PR #4071: Support pushdown sort by simple expressions + +**URL:** https://github.com/opensearch-project/sql/pull/4071 + +**Author:** @songkant-aws + +**Created:** 2025-08-19T09:07:15Z + +**State:** MERGED + +**Merged:** 2025-09-04T08:35:38Z + +**Changes:** +860 -1 (12 files) + +**Labels:** `enhancement` + + +## Description + +### Description +This PR aims to resolve simple sort expression pushdown problem without prerequisite of project pushdown optimization. + +A PPL query may contain a sort over projected trivial expression like: `source = test | eval b = a + 1 | sort b` + +This optimization PR will optimize this PPL into `source = test | sort a | eval b = a + 1` so that the problem is translated to pushdown field sort on column a. Also, sorting by field is supposed to be faster than sorting by script considering OpenSearch internal optimization. + +### Related Issues +Resolves #3990 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +It seems that when the sorted field is not projected in the final output, there will be an added sort at the top. + +E.g. + +- `source=opensearch-sql_test_index_account | eval b = balance + 1 | sort b | fields b`: + ``` + EnumerableCalc(expr#0=[{inputs}], expr#1=[1], expr#2=[+($t0, $t1)], balance=[$t0], $f1=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[SORT->[{ + "balance" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000, PROJECT->[balance]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]},"sort":[{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + ``` + +- but for `source=opensearch-sql_test_index_account | eval b = balance + 1 | sort b | fields b` + ``` + EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0=[{inputs}], expr#1=[1], expr#2=[+($t0, $t1)], $f0=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance], SORT->[{ + "balance" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]},"sort":[{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + ``` + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/SortProjectExprTransposeRule.java:None` + + +I'm curious when will their conventions be different. From the entrance rule, it seems they will always be `LogicalSort` and `LogicalProject` with `NONE` convention. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/SortProjectExprTransposeRule.java:None` + + +For this case, it should be the same. The original rule targets to optimize both logical and physical plans. So I used this check but forgot to remove it. I can remove it once I verify it's not needed. + + +### @yuancu on `opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java:229` + + +A converse test like the following may help: + +```java +// Cast from high precision to low precision +srcType = typeFactory.createSqlType(SqlTypeName.DECIMAL); +srcInput = rexBuilder.makeInputRef(srcType, 1); +dstType = + typeFactory.createSqlType( + SqlTypeName.DECIMAL, srcType.getPrecision() - 4, srcType.getScale()); +cast = rexBuilder.makeCast(dstType, srcInput); +result = OpenSearchRelOptUtil.getOrderEquivalentInputInfo(cast); +assertFalse(result.isPresent()); +``` + + +## General Comments + + +### @songkant-aws + + +> It seems that when the sorted field is not projected in the final output, there will be an added sort at the top. +> +> E.g. +> +> * `source=opensearch-sql_test_index_account | eval b = balance + 1 | sort b | fields b`: +> ``` +> EnumerableCalc(expr#0=[{inputs}], expr#1=[1], expr#2=[+($t0, $t1)], balance=[$t0], $f1=[$t2]) +> CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[SORT->[{ +> "balance" : { +> "order" : "asc", +> "missing" : "_first" +> } +> }], LIMIT->10000, PROJECT->[balance]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]},"sort":[{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) +> ``` +> * but for `source=opensearch-sql_test_index_account | eval b = balance + 1 | sort b | fields b` +> ``` +> EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) +> EnumerableCalc(expr#0=[{inputs}], expr#1=[1], expr#2=[+($t0, $t1)], $f0=[$t2]) +> CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance], SORT->[{ +> "balance" : { +> "order" : "asc", +> "missing" : "_first" +> } +> }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]},"sort":[{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) +> ``` + +Nice catch! It happens when project doesn't carry the equivalent input field in its project node expressions. In `ExpandCollationOnProjectExprRule`, I use Project itself collation to check if it satisfies the target collation(aka top sort collation). But since Project has only trivial simple expression output, it will contain empty RelCollation []. So in this case, the rule will skip due to the empty fromCollation. + +I need to check if it's safe to use Project.getInput() collation instead of collations on Project itself. The problem should be solved with that fix. + + +--- + +# PR #4068: [Backport 2.19-dev] Fix PPL eval command string concatenation with + operator + +**URL:** https://github.com/opensearch-project/sql/pull/4068 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-19T07:12:14Z + +**State:** MERGED + +**Merged:** 2025-08-19T20:34:52Z + +**Changes:** +119 -1 (2 files) + + +## Description + +Backport f40d8708df976c65b63802039fd9bdb0245d1959 from #4020. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4067: [2.19-dev] CVE-2025-8916: upgrade bcprov-jdk18on to 1.79 + +**URL:** https://github.com/opensearch-project/sql/pull/4067 + +**Author:** @LantaoJin + +**Created:** 2025-08-19T06:58:31Z + +**State:** MERGED + +**Merged:** 2025-08-20T18:17:52Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `security fix` + + +## Description + +### Description +[CVE-2025-8916](https://www.mend.io/vulnerability-database/CVE-2025-8916): upgrade bcprov-jdk18on to 1.79 + + +### Related Issues +Resolves CVE-2025-8916 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4064: [Backport 2.19-dev] Allow equal expression as a function argument + +**URL:** https://github.com/opensearch-project/sql/pull/4064 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-19T06:18:15Z + +**State:** MERGED + +**Merged:** 2025-08-20T05:44:37Z + +**Changes:** +24 -4 (3 files) + + +## Description + +Backport 9f2b19aae924545f105812d946ac86df8da95aaf from #4001. + + + +## Reviews + + +### @LantaoJin - APPROVED + + +Yes. The flaky will be fixed in my other PR + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @yuancu + + +There's a failed test: + +``` +org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT > testQ7 FAILED + java.net.SocketTimeoutException: 60 000 milliseconds timeout on connection http-outgoing-469 [ACTIVE] + at __randomizedtesting.SeedInfo.seed([533FB44206321030:8473C8A764182F94]:0) + at org.opensearch.client.RestClient.extractAndWrapCause(RestClient.java:1230) + at org.opensearch.client.RestClient.performRequest(RestClient.java:358) + at org.opensearch.client.RestClient.performRequest(RestClient.java:361) + at org.opensearch.client.RestClient.performRequest(RestClient.java:346) + at org.opensearch.sql.ppl.PPLIntegTestCase.executeQueryToString(PPLIntegTestCase.java:48) + at org.opensearch.sql.ppl.PPLIntegTestCase.executeQuery(PPLIntegTestCase.java:44) + at org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT.testQ7(CalcitePPLTpchIT.java:178) + + Caused by: + java.net.SocketTimeoutException: 60 000 milliseconds timeout on connection http-outgoing-469 [ACTIVE] + at org.apache.http.nio.protocol.HttpAsyncRequestExecutor.timeout(HttpAsyncRequestExecutor.java:387) + at org.apache.http.impl.nio.client.InternalIODispatch.onTimeout(InternalIODispatch.java:98) + at org.apache.http.impl.nio.client.InternalIODispatch.onTimeout(InternalIODispatch.java:40) + at org.apache.http.impl.nio.reactor.AbstractIODispatch.timeout(AbstractIODispatch.java:175) + at org.apache.http.impl.nio.reactor.BaseIOReactor.sessionTimedOut(BaseIOReactor.java:261) + at org.apache.http.impl.nio.reactor.AbstractIOReactor.timeoutCheck(AbstractIOReactor.java:506) + at org.apache.http.impl.nio.reactor.BaseIOReactor.validate(BaseIOReactor.java:211) + at org.apache.http.impl.nio.reactor.AbstractIOReactor.execute(AbstractIOReactor.java:280) + at org.apache.http.impl.nio.reactor.BaseIOReactor.execute(BaseIOReactor.java:104) + at org.apache.http.impl.nio.reactor.AbstractMultiworkerIOReactor$Worker.run(AbstractMultiworkerIOReactor.java:591) + at java.base/java.lang.Thread.run(Thread.java:829) +``` + +Can this be safely viewed as flaky? @LantaoJin + + +--- + +# PR #4062: [Backport 2.19-dev] Fix DOUBLE to STRING cast rendering zero values in scientific notation (#3982) + +**URL:** https://github.com/opensearch-project/sql/pull/4062 + +**Author:** @yuancu + +**Created:** 2025-08-19T03:24:55Z + +**State:** MERGED + +**Merged:** 2025-08-19T06:08:44Z + +**Changes:** +99 -5 (4 files) + +**Labels:** `bug` + + +## Description + +### Description + +Backport #3982 to 2.19-dev + +### Commit Message + +* Fix casting double 0.0 to string + + + +* Fix float to string casting precision lost with custom FormatNumberFunction + +This commit fixes float to string casting by replacing the use of SqlLibraryOperators.FORMAT_NUMBER with a custom FormatNumberFunction implementation. The new implementation converts the number to a BigDecimal before formatting to preserve precision and avoid issues like 6.2 becoming 6.199999809265137. + + + +* Simplify the implementation of fp number to string cast + + + +* Update implementation of NumberToStringFunction + + + +* Cast decimal with NUMBER_TO_STRING function + + + +* Test cast decimal + + + +--------- + + +(cherry picked from commit 19770838fb11738e85d75f39d99365276c9b5c8b) + +### Related Issues +Resolves #3947 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4060: Add flaky retry on CalcitePPLTpchIT + +**URL:** https://github.com/opensearch-project/sql/pull/4060 + +**Author:** @LantaoJin + +**Created:** 2025-08-19T03:21:35Z + +**State:** MERGED + +**Merged:** 2025-08-21T00:27:22Z + +**Changes:** +71 -1 (4 files) + +**Labels:** `flaky-test`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +CalcitePPLTpchIT.testQ7 failed with socket timeout sometimes on mac/windows instance. +q1 could be flaky when pushdown enabled since the precision of double sum depended on order of data. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +## Review Comments + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/util/Retry.java:None` + + +Is it easier to make this also support class level? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/util/RetryProcessor.java:None` + + +remove or logging? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/util/RetryProcessor.java:15` + + +np: could you add missing Javadoc? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/util/RetryProcessor.java:15` + + +added + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/util/RetryProcessor.java:None` + + +changed to logging + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/util/Retry.java:None` + + +done + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4059: [Backport 2.19-dev] Add sum and avg functions in eval + +**URL:** https://github.com/opensearch-project/sql/pull/4059 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-18T23:47:50Z + +**State:** MERGED + +**Merged:** 2025-08-19T03:55:34Z + +**Changes:** +596 -6 (6 files) + + +## Description + +Backport 8368e608eec6c917782bb27f5e8f7a4b9afbdad9 from #3986. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4055: Filter expression with `OR isnull(x)` should be pushed down + +**URL:** https://github.com/opensearch-project/sql/pull/4055 + +**Author:** @LantaoJin + +**Created:** 2025-08-18T09:00:50Z + +**State:** MERGED + +**Merged:** 2025-08-20T18:16:22Z + +**Changes:** +104 -4 (10 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Filter expression with `OR isnull(x)` should be pushed down in Calcite engine +Example: +``` +| where firstname = 'Amber' or isnull(gender) +``` +Should be pushed down to +``` + "query": { + "bool": { + "should": [ + { + "bool": { + "must_not": [ + { + "exists": { + "field": "gender", + "boost": 1 + } + } + ], + "adjust_pure_negative": true, + "boost": 1 + } + }, + { + "term": { + "firstname.keyword": { + "value": "Amber", + "boost": 1 + } + } + } + ], + "adjust_pure_negative": true, + "boost": 1 + } + } +``` +### Related Issues +Resolves #4046 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:643` + + +Shall we check other functions as well? e.g. `LENGTH` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:643` + + +Function LENGTH support accepting NULL as its arguments. No need to change. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4054: [Backport 2.19-dev] Implement type checking for aggregation functions with Calcite (#4024) + +**URL:** https://github.com/opensearch-project/sql/pull/4054 + +**Author:** @yuancu + +**Created:** 2025-08-18T03:23:35Z + +**State:** MERGED + +**Merged:** 2025-08-18T17:12:25Z + +**Changes:** +404 -318 (5 files) + + +## Description + +### Description + +Backport #4024 to 2.19-dev + +### Commit Message + +* Remove getTypeChecker from FunctionImp interface + + + +* Refactor registerExternalFunction to registerExternalOperator + + + +* Do not register GEOIP function if got incompatible client + + + +* Create scaffold for type checking of aggregation functions + + + +* Add type checkers for aggregation functions + + + +* Test type checking for aggregation functions + + + +--------- + + +(cherry picked from commit d758163cb9087d23a4c96694c6af47818532001f) + + +### Related Issues +Resolves #4000 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4045: Adding new maintainer @yuancu + +**URL:** https://github.com/opensearch-project/sql/pull/4045 + +**Author:** @LantaoJin + +**Created:** 2025-08-15T07:50:03Z + +**State:** MERGED + +**Merged:** 2025-08-20T18:18:26Z + +**Changes:** +5 -3 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Welcoming new maintainer @yuancu (Yuanchun Shen) + +### Related Issues +Resolves https://github.com/opensearch-project/.github/issues/374 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +Welcome :) + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Do not merge util https://github.com/opensearch-project/.github/issues/374 processed. cc @peterzhuamazon + + +--- + +# PR #4044: Allow type checkering on nested & struct fields for isnull, isnotnull, and ispresent + +**URL:** https://github.com/opensearch-project/sql/pull/4044 + +**Author:** @yuancu + +**Created:** 2025-08-15T06:21:11Z + +**State:** MERGED + +**Merged:** 2025-08-22T09:16:34Z + +**Changes:** +249 -116 (6 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Update the type checkers of isnull, isnotnull, and ispresent to support null checking of struct types (and possibly map types). + +It is worth mentioning that the pushdown behavior of `isnull(nested_field)` differs from its expected behavior: `isnull` with non-empty nested field should return `false`. For example, for the query `source=nested_simple | where isnull(address)`, where `adress` is of type `nested`, the generated DSL is + +```json +{ + "from": 0, + "size": 10000, + "timeout": "1m", + "query": { + "bool": { + "must_not": [ + { + "exists": { + "field": "address", + "boost": 1 + } + } + ], + "adjust_pure_negative": true, + "boost": 1 + } + }, + "_source": { + "includes": [ + "name", + "address", + "id", + "age" + ], + "excludes": [] + }, + "sort": [ + { + "_doc": { + "order": "asc" + } + } + ] +} +``` + +The DSL says it will fetch rows where `address` *must not exist*. However, it fetches rows where the address is **NOT** null: + +
    +Execution result + +```json +{ + "schema": [ + { + "name": "name", + "type": "string" + }, + { + "name": "address", + "type": "array" + }, + { + "name": "id", + "type": "bigint" + }, + { + "name": "age", + "type": "bigint" + } + ], + "datarows": [ + [ + "abbas", + [ + { + "city": "New york city", + "moveInDate": { + "dateAndTime": "1984-04-12 09:07:42" + }, + "state": "NY" + }, + { + "city": "bellevue", + "moveInDate": [ + { + "dateAndTime": "2023-05-03 08:07:42" + }, + { + "dateAndTime": "2001-11-11 04:07:44" + } + ], + "state": "WA" + }, + { + "city": "seattle", + "moveInDate": { + "dateAndTime": "1966-03-19 03:04:55" + }, + "state": "WA" + }, + { + "city": "chicago", + "moveInDate": { + "dateAndTime": "2011-06-01 01:01:42" + }, + "state": "IL" + } + ], + null, + 24 + ], + [ + "chen", + [ + { + "city": "Miami", + "moveInDate": { + "dateAndTime": "1901-08-11 04:03:33" + }, + "state": "Florida" + }, + { + "city": "los angeles", + "moveInDate": { + "dateAndTime": "2023-05-03 08:07:42" + }, + "state": "CA" + } + ], + null, + 32 + ], + [ + "peng", + [ + { + "city": "san diego", + "moveInDate": { + "dateAndTime": "2001-11-11 04:07:44" + }, + "state": "CA" + }, + { + "city": "austin", + "moveInDate": { + "dateAndTime": "1977-07-13 09:04:41" + }, + "state": "TX" + } + ], + null, + 26 + ], + [ + "andy", + [ + { + "city": "houston", + "moveInDate": { + "dateAndTime": "1933-12-12 05:05:45" + }, + "state": "TX" + } + ], + 4, + 19 + ], + [ + "david", + [ + { + "city": "raleigh", + "moveInDate": { + "dateAndTime": "1909-06-17 01:04:21" + }, + "state": "NC" + }, + { + "city": "charlotte", + "moveInDate": [ + { + "dateAndTime": "2001-11-11 04:07:44" + } + ], + "state": "SC" + } + ], + null, + 25 + ] + ], + "total": 5, + "size": 5 +} +``` + +
    + + +Therefore, this PR also prevents `isnull(nested_field)` to be pushed down. + +### Related Issues +Resolves #4004 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexScanRule.java:None` + + +seems this checker only cover a simple Filter such as `where isnull(a.b)`, rather than a complex filter expression such as `where a.b = 'xx' and isnull(a.c)` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexScanRule.java:None` + + +Thanks for reminding! I'll double check. + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexScanRule.java:None` + + +Fixed + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +How about adding a new method like `registerOperator(FUNC_NAME, OPERATOR, TYPE_CHECKER)` to simplify this? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Did not do so since I worried that it might mix levels of abstraction. Yet having such a method does simplify many duplicated snippets. Updated the implementation. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4041: PPL Coalesce Function Enhancement for Calcite - Advanced Type Handling and Field Support + +**URL:** https://github.com/opensearch-project/sql/pull/4041 + +**Author:** @aalva500-prog + +**Created:** 2025-08-14T22:22:56Z + +**State:** MERGED + +**Merged:** 2025-08-28T21:05:25Z + +**Changes:** +814 -24 (11 files) + +**Labels:** `enhancement`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +# PPL Coalesce Function Enhancement for Calcite - Advanced Type Handling and Field Support + +## Description + +This PR implements significant enhancements to the existing PPL `coalesce` function in Calcite to address current limitations with strict type checking and field requirements. The enhancement transforms coalesce from a restrictive function that requires same-type arguments and existing fields to a flexible function that handles mixed data types, non-existent fields, and empty values gracefully. + +### Features Implemented + +**✅ Feature 1: Automatic Type Coercion** +- Removed strict same-type requirement for all arguments +- Implemented intelligent type coercion for mixed data types +- Example: `source=data | eval result = coalesce(field1, 123, "fallback")` + +**✅ Feature 2: Non-existent Field Support** +- Added support for dummy/non-existent fields in coalesce arguments +- Graceful handling of missing fields without runtime errors +- Example: `source=firewall | eval src_ip = coalesce(src_ip, sourceip, dummy_field, "unknown")` + +**✅ Feature 3: Empty Field Handling** +- Support for empty strings ("") and whitespace strings (" ") as valid values +- Empty strings are treated as valid non-null values, not skipped +- Example: `source=accounts | eval result = coalesce("", firstname)` returns "" + +## Technical Implementation + +### Calcite Changes +- Updated coalesce function to support mixed data types with automatic type coercion +- Enhanced field resolution to handle non-existent fields without errors +- Modified value processing to treat empty strings as valid non-null values + +## Usage Examples + +```ppl +# Feature 1: Automatic Type Coercion +source=data | eval result = coalesce(field1, 123, "fallback") +source=logs | eval value = coalesce(numeric_field, "0", 999) + +# Feature 2: Non-existent Field Support +source=firewall | eval src_ip = coalesce(src_ip, sourceip, dummy_field, "unknown") +source=logs | eval ip = coalesce(clientip, ipaddress, non_existent_field, "127.0.0.1") + +# Feature 3: Empty Field Handling +source=accounts | eval result = coalesce("", firstname) # Returns "" +source=accounts | eval result = coalesce(" ", firstname) # Returns " " +source=accounts | eval result = coalesce(firstname, "") # Returns firstname or "" + +# Combined Features Example +source=firewall | eval src_ip = coalesce(src_ip, sourceip, dummy_field, "unknown") # Uses all 3 features +``` +## Testing + +### Testing Plan +- **Type Coercion**: Test mixed data type handling and automatic conversions +- **Non-existent Fields**: Test graceful handling of missing/dummy fields +- **Empty Values**: Test empty string and whitespace processing +- **Backward Compatibility**: Verify existing queries continue to work +- **Performance**: Validate no significant performance degradation + +## Performance Impact + +Type coercion and field resolution changes may have minimal performance overhead. Performance testing will be conducted with big5 dataset to ensure acceptable impact. + +## Breaking Changes + +**None** - This PR maintains full backward compatibility while adding enhanced functionality. + +## Functionality Improvements + +| Feature | Before Enhancement | After Enhancement | +|---------|-------------------|-------------------| +| Mixed Types | ❌ Runtime Error | ✅ Automatic Coercion | +| Non-existent Fields | ❌ Runtime Error | ✅ Graceful Handling | +| Empty Strings | ❌ Not Supported | ✅ Treated as Valid | +| Type Safety | ✅ Strict Checking | ✅ Intelligent Coercion | + +## Documentation + +PPL command documentation is updated in this PR to reflect the new coalesce behavior including type coercion, non-existent field handling, and empty value support. + +## Best Practices + +- **Field Ordering**: Place most specific/reliable fields first in coalesce arguments +- **Type Consistency**: While mixed types are supported, prefer consistent types when possible +- **Fallback Values**: Always include a literal fallback value as the last argument +- **Performance**: Use coalesce early in the query pipeline for optimal performance + +## BIG5 COALESCE PERFORMANCE TEST +Dataset: big5 +Test Date: Tue Aug 19 2025 + +### Baseline: +`source=big5 | head ` +head 10: Avg = 19ms, P90 = 25ms +head 100: Avg = 32ms, P90 = 38ms +head 1K: Avg = 123ms, P90 = 124ms +head 10K: Avg = 1170ms, P90 = 1222ms +head 100K: Avg = 1109ms, P90 = 1134ms + +`source=big5 | eval result = coalesce(`host.name`, 'unknown') | head ` +head 10: Avg = 37ms, P90 = 40ms +head 100: Avg = 40ms, P90 = 44ms +head 1K: Avg = 147ms, P90 = 164ms +head 10K: Avg = 1167ms, P90 = 1177ms +head 100K: Avg = 1160ms, P90 = 1196ms + +`source=big5 | eval result = coalesce(`host.name`, `agent.name`, 'unknown') | head ` +head 10: Avg = 29ms, P90 = 33ms +head 100: Avg = 46ms, P90 = 49ms +head 1K: Avg = 157ms, P90 = 199ms +head 10K: Avg = 1194ms, P90 = 1221ms +head 100K: Avg = 1166ms, P90 = 1203ms + +`source=big5 | eval result = coalesce(`host.name`, 'unknown') | fields result, `host.name` | head ` +head 10: Avg = 25ms, P90 = 27ms +head 100: Avg = 28ms, P90 = 28ms +head 1K: Avg = 56ms, P90 = 57ms +head 10K: Avg = 367ms, P90 = 391ms +head 100K: Avg = 347ms, P90 = 354ms + +### Feature testing: +`source=big5 | eval result = coalesce(`host.name`, 123, 'fallback') | head ` +head 10: Avg = 33ms, P90 = 37ms +head 100: Avg = 48ms, P90 = 52ms +head 1K: Avg = 166ms, P90 = 196ms +head 10K: Avg = 1138ms, P90 = 1163ms +head 100K: Avg = 1181ms, P90 = 1206ms + +`source=big5 | eval result = coalesce(`host.name`, dummy_field, 'unknown') | head ` +head 10: Avg = 32ms, P90 = 38ms +head 100: Avg = 40ms, P90 = 43ms +head 1K: Avg = 135ms, P90 = 137ms +head 10K: Avg = 1193ms, P90 = 1239ms +head 100K: Avg = 1185ms, P90 = 1247ms + +`source=big5 | eval result = coalesce('', `host.name`) | head ` +head 10: Avg = 45ms, P90 = 62ms +head 100: Avg = 37ms, P90 = 39ms +head 1K: Avg = 148ms, P90 = 172ms +head 10K: Avg = 1177ms, P90 = 1177ms +head 100K: Avg = 1143ms, P90 = 1152ms + +## Related Issues +Adresses #[4005](https://github.com/opensearch-project/sql/issues/4005) + + +### Check List +- [✅] New functionality includes testing. +- [✅] New functionality has been documented. +- [✅] New functionality has javadoc added. +- [✅] New functionality has a user manual doc added. +- [✅] Commits are signed per the DCO using `--signoff`. +- [✅] Public documentation issue/PR [created](https://github.com/opensearch-project/sql/issues/4005). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +I have some high level questions: + +1. Is Automatic Type Coercion applied to all other functions as well? I'm just thinking +2. Besides UDF, did we consider other approaches? e.g., handle nonexistent or empty field in Analyzer layer? + + +### @RyanL1997 - COMMENTED + + +Hi @aalva500-prog , thanks for taking this on, and I just left some comments. + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:None` + + +(I haven't read through the implementation, just came across this PR.) + +A high level question: why are we allowing incompatible types for coalesce function? How will it know what type should the result be in advance? + +For example, in `eval res = coalesce(string, integer, ip, geopoint, date, timestamp, time, nested, struct)`, there are null values for different columns in different rows, `res` may be either of the 9 types. Then what should be the type of the final result column `res`? + + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Lets remove the unnecessary change + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +any reson we need to remove this? + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +nit: maybe not inline import + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:None` + + +To me, this returns the type of the first non-null operand, not the first non-null value. At compile time, you don't know which value will be non-null at runtime. + +e.g. +```bash +coalesce(null_field, "string", 123) +``` +If `null_field` is nullable string type, this returns `STRING`, but what if at runtime `null_field` is `null` and "string" is also `null`? Then `123` (`INTEGER`) would be returned, causing a type mismatch. + +Let me know if I missed some context. + + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +- Is that possible to enum comparison instead of `equals()`? +- Should we do any safety check to make sure the conversion is safe before calling `stringValue()`? +- `trim().isEmpty()` means `" "` becomes empty - is this intended? + + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:None` + + +You are right to question this @yuancu. Please see my answer below and thanks for your insights! + +The current implementation replicates SPL's behavior, where `coalesce` performs no type checking and returns the value and data type of the first non-null argument. + +Your point about type safety is well-taken. The current implementation has a fundamental limitation: it permits incompatible types and uses a "first non-null type" approach for return type inference, which can cause runtime type errors. + +The challenge is that Calcite requires compile-time type determination, but with mixed incompatible types, the actual return type depends on runtime data availability. + +To address this concern, I propose the following approach: + +1. Try to find a least restrictive common type for compatible types +2. If that fails (incompatible types), fall back to VARCHAR/STRING as the universal type + +That being explained, while your example `coalesce(string, integer, ip, geopoint, date, timestamp, time, nested, struct)` is syntactically valid in SPL, it represents an uncommon use case. In practice, coalesce operations with such diverse data types may encounter type compatibility issues and should be used judiciously. I'll keep you posted when I implement a new solution, thanks again! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Not really, I'll put it back, thanks for catching this! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:None` + + +You are right, however, your description matches with how SPL works with different data types in `coalesce`. Nevertheless, I think I can improve this logic and handle it differently by doing an automatic type coercion, thanks for bringing this up! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +sure, I can do that, np! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +sounds good! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +Hi @RyanL1997, thanks for these observations, please find below the changes I'll perform and the answer to your question about `trim.isEmpty()`: + +1. Enum comparison: I'll change it from `arg.type().typeName().equals("STRING")` to `ExprCoreType.STRING.equals(arg.type())` which is more type-safe and efficient. + +2. Safety check: The enum comparison `ExprCoreType.STRING.equals(arg.type())` inherently provides the safety check - Then I'll only call stringValue() when I've confirmed the type is STRING. + +3. `trim().isEmpty()` behavior: Yes, this is intentional. The enhanced coalesce skips whitespace-only strings treating them as "empty" values to find the next non-empty argument. What I want in this case is to skip not just null/missing values but also empty strings. + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +@RyanL1997 made some changes to my code, please review, thanks! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:None` + + +@RyanL1997 made some changes to my code, please review, thanks! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:None` + + +@yuancu made some changes to my code, please review, thanks! + + +### @RyanL1997 on `integ-test/src/test/java/org/opensearch/sql/security/CrossClusterCoalesceIT.java:None` + + +nit: remove these verbose comments + + +### @RyanL1997 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:476` + + +The `inCoalesceFunction` flag in context might not be thread-safe if the visitor is used concurrently. Also, consider if nested `COALESCE` calls are properly handled - the flag might get incorrectly reset if `COALESCE` is nested within another `COALESCE`. So can you confirm the above? + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java:None` + + +No assertion? + + +### @dai-chen on `docs/user/ppl/functions/condition.rst:314` + + +Also highlight this in Behaviors section above? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:40` + + +"the least restrictive common type among all arguments", this is determined by Calcite? + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:476` + + +@RyanL1997 Thanks for raising this point. The current implementation handles your scenario with nested coalesce. Please see the screenshots below for reference. + +1) non-existing field in outer coalesce correctly returns field in inner coalesce: +Screenshot 2025-08-28 at 11 06 42 AM + +2) existing field in outer coalesce is correctly returned: +Screenshot 2025-08-28 at 11 09 12 AM + +3) non-existing field in both inner/outer coalesce: +Screenshot 2025-08-28 at 11 29 50 AM + + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java:40` + + +Hi @dai-chen, yes, the "least restrictive common type" is determined by Calcite's built-in type system in the `getReturnTypeInference()` method. The type inference does not happen in `createImplementor()`. The type inference happens earlier during Calcite's query planning phase when it calls `getReturnTypeInference()`. If Calcite can't find a common type, it falls back to `VARCHAR` as the most permissive type that can represent any value as a string. So `createImplementor()` doesn't determine the type - it just accesses the type that was already computed by the `getReturnTypeInference()` method during an earlier planning phase. The `createImplementor()` method generates the runtime execution code that uses the pre-determined type. + +#### Summary of how EnhancedCoalesceFunction works: +1) Function Registration: In `PPLBuiltinOperators.java` as `ENHANCED_COALESCE = new EnhancedCoalesceFunction().toUDF("COALESCE")` +2) UDF Creation: Called by `UserDefinedFunctionBuilder.toUDF()` to create the `SqlUserDefinedFunction` +3) Query Planning: Calcite uses it during type inference to determine the `COALESCE` result type +4) Type Inference Flow: + - Planning Phase: `getReturnTypeInference()` → Calcite's `leastRestrictive()` determines common type (e.g., `INTEGER`, `VARCHAR`) + - Code Generation: `createImplementor()` → `call.getType()` retrieves the already-determined type + - Runtime: `enhancedCoalesceWithType()` receives the type name and coerces the result accordingly + + +## General Comments + + +### @aalva500-prog + + +Hi @dai-chen, thanks for raising these points! Please find below my answers, as well as implementation and design decision: + +### 1) Is Automatic Type Coercion applied to all other functions as well? +As far as I'm concerned, no, automatic type coercion is NOT applied to all other functions. I'm not aware if the enhancements for the coalesce function are going to be applied to other functions. We may need to verify and confirm with the team. + +Currently, the below is what we have: + +**For condition functions:** +- COALESCE & IFNULL: Get Calcite's automatic type coercion +- IS_NULL, IS_NOT_NULL, ISPRESENT: Don't need type coercion (single operand) +- EARLIEST, LATEST: Custom UDFs without automatic coercion +- IF, CASE: Use manual type checking + +**For `eval` related functions (Legacy engine):** +- No automatic type coercion - require strict type matching +- Use manual type checking and casting +- Implemented in the legacy expression evaluation system + +**Other functions WITHOUT Automatic Type Coercion:** +- UDF Functions: Use strict type checking via PPLTypeChecker +- Math Functions: Implement manual type handling (e.g., DivideFunction) +- Custom Functions: Require explicit type matching + +### 2) Besides UDF, did we consider other approaches? e.g., handle nonexistent or empty field in Analyzer layer? +Yes, other approaches were considered: +1. Analyzer Layer Handling (like eval functions): + - Yes, this could handle non-existent fields in ExpressionAnalyzer.visitIdentifier() + - However, it would require changes to core analysis pipeline + - Also, it would work for legacy engine but not leverage Calcite's capabilities + +2. Native Calcite Integration: Direct use of SqlStdOperatorTable.COALESCE + - This is the standard Calcite operator + - Type inference occurs in Calcite's leastRestrictive() method - Called by Calcite internally + - Error thrown by Calcite's type system - When it cannot find a common type + ``` + Cannot infer return type for COALESCE; operand types: [VARCHAR, INTEGER, VARCHAR] + ``` + +### 3) Why UDF Approach Was Chosen: +- Separation of Concerns: Field handling is done at the visitor level, not analyzer level. Type coercion is done at Calcite level +- Calcite Integration: Leverages Calcite's engine. UDF bridges PPL syntax to Calcite's native operators seamlessly +- Type System Compatibility: Leverages Calcite's sophisticated type coercion automatically +- Minimal Impact: Doesn't require changes to core analysis pipeline +- Performance: Calcite's optimized type handling vs manual eval function logic +- Future-proof: Works with Calcite engine evolution + +**The implementation shows a hybrid approach:** +- Field resolution handled in CalciteRexNodeVisitor (visitor layer) +- Type coercion delegated to Calcite's native COALESCE operator +- UDF wrapper provides the bridge between PPL and Calcite + +### 4) Why Creating Our Own Enhanced Coalesce Function: + +The data type evaluation happens entirely within Calcite's core libraries, not in this project's codebase. This project only calls SqlStdOperatorTable.COALESCE, and Calcite handles the rest. + +Currently COALESCE Data Type Evaluation in this Project works in the following way: + +1. Type Checking: +The following error comes from Calcite's core libraries: + ``` + Cannot infer return type for COALESCE; operand types: [VARCHAR, INTEGER, VARCHAR] + ``` + +The Calcite libraries are external dependencies that contain the rigid type system causing the mixed-type issues. + +2. How COALESCE Type Evaluation Works and Where It Happens: + * Calcite's SqlStdOperatorTable.COALESCE - This is the standard Calcite operator. + * Type inference occurs in Calcite's leastRestrictive() method - Called by Calcite internally + * Error thrown by Calcite's type system - When it cannot find a common type + +3. Currently, in this Project's Code: + - PPLFuncImpTable.java + ``` + return builder.makeCall(SqlStdOperatorTable.COALESCE, processedArgs); + ``` + + - ExtendedRexBuilder.java: + ``` + public RelDataType commonType(RexNode... nodes) { + return this.getTypeFactory() + .leastRestrictive(Arrays.stream(nodes).map(RexNode::getType).toList()); + } + ``` + +4. Calcite Libraries Location: + * Gradle Dependencies (build.gradle): + - api('org.apache.calcite:calcite-core:1.38.0') + - api 'org.apache.calcite:calcite-linq4j:1.38.0' + + * Physical JAR Files: + ``` + ~/.gradle/caches/modules-2/files-2.1/org.apache.calcite/calcite-core/1.38.0/ + ├── calcite-core-1.38.0.jar # Compiled classes + ├── calcite-core-1.38.0-sources.jar # Source code + └── calcite-core-1.38.0-javadoc.jar # Documentation + ``` + +5. COALESCE Implementation Files: + * SqlCoalesceFunction.java - Main COALESCE function definition + * RexImpTable$CoalesceImplementor.class - Runtime implementation + +6. Key Code in SqlCoalesceFunction.java: + - SqlCoalesceFunction + ``` + public SqlCoalesceFunction() { + super("COALESCE", + SqlKind.COALESCE, + ReturnTypes.LEAST_RESTRICTIVE.andThen(SqlTypeTransforms.LEAST_NULLABLE), // ← TYPE INFERENCE HERE + null, + OperandTypes.SAME_VARIADIC, // ← REQUIRES SAME TYPES + SqlFunctionCategory.SYSTEM); + } + ``` + + - COALESCE Rewrite Logic: + ``` + @Override public SqlNode rewriteCall(SqlValidator validator, SqlCall call) { + // COALESCE gets rewritten to CASE statement: + // COALESCE(a, b, c) becomes: + // CASE WHEN a IS NOT NULL THEN a + // WHEN b IS NOT NULL THEN b + // ELSE c END + } + ``` + + - The Problem: + * ReturnTypes.LEAST_RESTRICTIVE - Calls leastRestrictive() method + * OperandTypes.SAME_VARIADIC - - Expects all operands to be the same type + * When mixed types like [VARCHAR, INTEGER, VARCHAR]are passed: + - leastRestrictive() returns null + - Calcite throws "Cannot infer return type for COALESCE" + + 6. The Solution Needed: + To fix this, we would need to either: + * Modify Calcite's type inference system (not feasible) + * Register our custom function instead of using SqlStdOperatorTable.COALESCE (This is the approach I'm implementing) + + +### @ahkcs + + +For this test: `CrossClusterCoalesceIT.java` +I think we should add our test cases in `CrossClusterSearchIT` instead of creating our own ones? + + +### @aalva500-prog + + +@ahkcs If the command/function enhancements you are implementing only works when Calcite is enabled, adding test cases in this `CrossClusterSearchIT` file will give errors. Maybe we can create a cross-cluster test file for Calcite only and add all our tets there. + + +### @RyanL1997 + + +Also, may need a rebase and resolve the conflict + + +--- + +# PR #4030: [Backport 2.19-dev] Skip script encoding when run explain with 'extended' (#3930) + +**URL:** https://github.com/opensearch-project/sql/pull/4030 + +**Author:** @LantaoJin + +**Created:** 2025-08-13T08:02:04Z + +**State:** MERGED + +**Merged:** 2025-08-13T20:59:51Z + +**Changes:** +39 -2 (7 files) + + +## Description + +(cherry picked from #3930 by commit 964d8b50d44af7377fa97f1f0acc680fef463b09) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4029: adding capability in SQLQueryUtils to identify if SQL query is for creating a table or not. + +**URL:** https://github.com/opensearch-project/sql/pull/4029 + +**Author:** @aggarwalmayank + +**Created:** 2025-08-13T07:15:04Z + +**State:** MERGED + +**Merged:** 2025-08-20T17:18:35Z + +**Changes:** +100 -7 (2 files) + +**Labels:** `enhancement`, `SQL`, `backport 2.x`, `backport-manually`, `backport-failed` + + +## Description + +### Description +[Describe what this change achieves] +adding capability in SQLQueryUtils to identify if SQL query is for creating a table or not. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + +NA + +### Check List +- [ Y] New functionality includes testing. +- [NA ] New functionality has been documented. + - [ NA] New functionality has javadoc added. + - [NA ] New functionality has a user manual doc added. +- [NA ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [ Y] Commits are signed per the DCO using `--signoff`. +- [ NA] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - DISMISSED + + +@ykmr1224 Please take another look. + + +## Review Comments + + +### @ykmr1224 on `async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java:None` + + +nit: let's put `s` at the end of the variable name (since it is a list) + + +### @ykmr1224 on `async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java:46` + + +Would it actually avoid parsing query twice? + + +### @aggarwalmayank on `async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java:46` + + +yes, users can use any of the method as per their requirement and the same visitor "SparkSqlTableNameVisitor" can be used + + +## General Comments + + +### @ykmr1224 + + +Need manual backport. + + +--- + +# PR #4028: [Backport 2.19-dev] Fix span on negative timestamp + +**URL:** https://github.com/opensearch-project/sql/pull/4028 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-13T02:37:30Z + +**State:** MERGED + +**Merged:** 2025-08-19T02:33:00Z + +**Changes:** +85 -11 (3 files) + + +## Description + +Backport ad3fc1f2926d916e19727e6d4b96c23fbecce325 from #4017. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @dai-chen + + +``` +2025-08-13T03:35:50.5364120Z 5093 tests completed, 1 failed, 150 skipped +2025-08-13T03:35:50.5365330Z Tests with failures: +2025-08-13T03:35:50.6363650Z - org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT.testQ7 +``` + +I've seen this failure before. Is it flaky? + + +### @noCharger + + +@qianheng-aws could you check the CI failure? + + +### @qianheng-aws + + +> I've seen this failure before. Is it flaky? +``` +org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT > testQ7 FAILED + java.net.SocketTimeoutException: 60,000 milliseconds timeout on connection http-outgoing-470 [ACTIVE] +``` +It's flakey and not related to this change. I've seen it several times before. + +Shall we disable q7 until we find the root cause of the flakey? @LantaoJin + + +### @LantaoJin + + +> > I've seen this failure before. Is it flaky? +> +> ``` +> org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT > testQ7 FAILED +> java.net.SocketTimeoutException: 60,000 milliseconds timeout on connection http-outgoing-470 [ACTIVE] +> ``` +> +> It's flakey and not related to this change. I've seen it several times before. +> +> Shall we disable q7 until we find the root cause of the flakey? @LantaoJin + +Okey, let me add some retry for q7. The q7 may fail sometimes with socket timeout on mac/windows. + + +--- + +# PR #4025: Update delete_backport_branch workflow to include release-chores branches + +**URL:** https://github.com/opensearch-project/sql/pull/4025 + +**Author:** @RileyJergerAmazon + +**Created:** 2025-08-12T16:59:42Z + +**State:** MERGED + +**Merged:** 2025-10-07T17:49:25Z + +**Changes:** +11 -4 (1 files) + +**Labels:** `infrastructure`, `stalled` + + +## Description + +This PR updates the delete_backport_branch workflow to automatically delete branches that start with 'release-chores/' after they are merged, in addition to the existing condition for 'backport/' branches. + + + +## Reviews + + +### @dai-chen - COMMENTED + + +I think delete backport CI has been broken for a long while? + + +### @dai-chen - APPROVED + + +Thanks for the fix! Just want to confirm if PR status check is required: https://github.com/opensearch-project/OpenSearch/blame/main/.github/workflows/delete_backport_branch.yml + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RileyJergerAmazon + + +Hey @dai-chen, I updated it, and now it should start working again once merged. + + +--- + +# PR #4024: Implement type checking for aggregation functions with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/4024 + +**Author:** @yuancu + +**Created:** 2025-08-12T10:00:30Z + +**State:** MERGED + +**Merged:** 2025-08-15T05:13:14Z + +**Changes:** +387 -307 (5 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Previously, using aggregation functions on improper fields will throw errors at runtime (in both v2 and v3). This PR implements type checking for aggregation functions in OpenSearch PPL with Calcite enabled. + +Additionally, this PR refactored type checker to make its association with function implementation more straightforward. + +### Related Issues +Resolves #4000 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +Basically LGTM. but with the refactor, developer has to re-learn how to dev a new UDF/UDAF. @yuancu could you create a document issue for how to develop a new UDF/UDAF with examples? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @yuancu + + +> could you create a document issue for how to develop a new UDF/UDAF with examples? + +No problem. #4043 created. + + + +### @LantaoJin + + +@yuancu need backporting manually. + + +--- + +# PR #4023: [Backport 3.2] Prevent aggregation push down when it has inner filter(#4002) + +**URL:** https://github.com/opensearch-project/sql/pull/4023 + +**Author:** @qianheng-aws + +**Created:** 2025-08-12T02:47:58Z + +**State:** MERGED + +**Merged:** 2025-08-13T02:29:01Z + +**Changes:** +159 -12 (9 files) + +**Labels:** `bug`, `PPL`, `calcite` + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Approved. But we need to check if it's already code freeze? @Swiddis + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4022: [Backport 2.19-dev] Prevent aggregation push down when it has inner filter + +**URL:** https://github.com/opensearch-project/sql/pull/4022 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-11T18:30:12Z + +**State:** MERGED + +**Merged:** 2025-08-18T08:21:42Z + +**Changes:** +159 -12 (9 files) + + +## Description + +Backport 6e3329faca3efd77a2f9746194664ccf8ea1df73 from #4002. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @qianheng-aws + + +CI failed because of q7 timeout. It's unrelated + + +--- + +# PR #4021: [Backport 3.2] Eliminate reliance on assert in Calcite for integration test + +**URL:** https://github.com/opensearch-project/sql/pull/4021 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-11T18:16:05Z + +**State:** MERGED + +**Merged:** 2025-08-11T20:19:38Z + +**Changes:** +125 -73 (5 files) + +**Labels:** `testing` + + +## Description + +Backport 569bf940dbb5ec06c27c109e8ef8d97f6d3e9a20 from #4016. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @dai-chen + + +CI / build-linux (24) failed on `2025-08-11T18:51:00.3477512Z * What went wrong: +2025-08-11T18:51:00.3477923Z Execution failed for task ':integ-test:startPrometheus'. +2025-08-11T18:51:00.3479613Z > de.undercouch.gradle.tasks.download.org.apache.hc.client5.http.ClientProtocolException: Service Unavailable (HTTP status code: 503, URL: https://github.com/prometheus/prometheus/releases/download/v2.37.2/prometheus-2.37.2.linux-amd64.tar.gz)` + + +### @dai-chen + + +Retrying. Will merge if still the same transient network issue. + + +--- + +# PR #4020: Fix PPL eval command string concatenation with + operator + +**URL:** https://github.com/opensearch-project/sql/pull/4020 + +**Author:** @ahkcs + +**Created:** 2025-08-11T16:51:08Z + +**State:** MERGED + +**Merged:** 2025-08-19T07:12:00Z + +**Changes:** +119 -1 (2 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +# Fix PPL eval command string concatenation with + operator + +## Description + +This PR fixes a critical issue where string concatenation using the + operator in PPL eval commands would fail with a runtime error during Calcite query planning. Related #4014 + +## Problem + +PPL queries with string concatenation using the + operator were failing with: +``` +java.sql.SQLException: Error while preparing plan +LogicalProject... full_name=[+(+($1, ' '), $10)] +``` + +Example failing query: +```sql +source=accounts | eval full_name = firstname + " " + lastname | fields account_number, firstname, lastname, full_name +``` + +## Example Usage + +After this fix, the following PPL queries now work correctly: + +```sql +# Basic string concatenation +source=accounts | eval full_name = firstname + " " + lastname +``` + + + +## Reviews + + +### @yuancu - CHANGES_REQUESTED + + +I suggest you re-implement this PR in the following way (after #4024 is approved): + +```java +register( + ADD, + (RexBuilder builder, RexNode... args) -> + builder.makeCall(SqlStdOperatorTable.PLUS, args), + PPLTypeChecker.wrapFamily(OperandTypes.NUMERIC_NUMERIC)); + +registerOperator(ADD, SqlStdOperatorTable.CONCAT); +``` + + +This registers the same function with two different implementations, controlled via type checkers. + + +P.s. please make the PR title more specific. I.e. what enhancement it is. + + +### @yuancu - COMMENTED + + +I think there should be an integration test to verify result correctness of string concatenation with +. + + +### @qianheng-aws - COMMENTED + + +Please add ITs for this PR. + + +## Review Comments + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Do we convert types implicitly? In that case, should we handle other types? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Let's extract and make `visitFunction` smaller for better readability. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + ++1. Is it possible to leverage current visitor pattern to dispatch instead of adding more and more conditional checks in a single visit method? + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Currently, we don't perform implicit type conversion - we only handle the case where both operands are already string types (VARCHAR/CHAR) or both numbers(already supported) + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +I have implemented a dispatch method to resolve this + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +This fixing can address the request but the exception logic is tricky to me. It skips the TypeChecker. Can we refactor it similar to https://github.com/opensearch-project/sql/pull/3821? cc @yuancu for further suggestions + + +### @Swiddis on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:None` + + +Do these tests cover what happens if there are null entries in the dataset? + + +### @ahkcs on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:None` + + +I have added more tests to cover null entries scenario + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +I have refactored + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:694` + + +I would recommend registering `+` for string in a separate function signature. The current builder should support registering multiple times for the same function name with different signatures. And then its type checker should be simpler, should using PPLFamilyTypeChecker directly. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Do we really want register `ADD` function for string? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +This function is removed in #4024 as it is unnecessarily involved. Please refactor accordingly following #4024. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:None` + + +Please remove this from `PPLTypeChecker` interface. Please use `PPLTypeChecker.wrapComposite` instead. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +I think just `registerOperator(PLUS, SqlStdOperatorTable.CONCAT);` could implement this without specifying type checker, does it? @yuancu + +`registerOperator` already has capability to generate type checker itself automatically. + + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +You are right, I updated my suggestions on implementation to the following: + +```java +register( + ADD, + (RexBuilder builder, RexNode... args) -> + builder.makeCall(SqlStdOperatorTable.PLUS, args), + PPLTypeChecker.wrapFamily(OperandTypes.NUMERIC_NUMERIC)); + +registerOperator(ADD, SqlStdOperatorTable.CONCAT); +``` + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Removed + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Updated implementation + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:None` + + +Removed + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:694` + + +Updated implementation + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Is there any specific reason that you did not leverage `registerOperator(ADD, SqlStdOperatorTable.CONCAT);`? + +> P.s. please make the PR title more specific. I.e. what enhancement it is. + +A reminder ⬆️ + +P.s. please take some rest on Sunday 😂 + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +I assumed both registrations needed explicit type checking, I have made changes according to your suggestion + +P.S. Thanks for the reminder about rest! 😄 Will update the PR title to be more specific. + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:None` + + +I doubt that these tests really cover cases where null entries exist -- these tests only verify the correctness of plans. It's only during the execution of these plans that one will know what happens when there are null operands. + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:None` + + +Can be removed since concatenating string with null operands is actually verified in IT + + +### @ahkcs on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java:None` + + +Removed + + +## General Comments + + +### @LantaoJin + + +> I think there should be an integration test to verify result correctness of string concatenation with +. + +@ahkcs normally, we don't use `CalcitePPL*Test` to verify the result since the `CalcitePPL*Test` leverages Calcite catalog instead of ours which may have some logic gaps. `CalcitePPL*Test` is useful to verify the pure Calcite plan and SparkSQL generated. Instead, you can add an integration test in `CalcitePPL*IT` + + +### @ahkcs + + +@qianheng-aws @yuancu Added IT tests + + + + +--- + +# PR #4019: Add wildcard support for rename command + +**URL:** https://github.com/opensearch-project/sql/pull/4019 + +**Author:** @ritvibhatt + +**Created:** 2025-08-11T16:36:37Z + +**State:** MERGED + +**Merged:** 2025-09-08T18:46:25Z + +**Changes:** +732 -41 (11 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + - Added wildcard support for the rename command in PPL queries + - Implemented wildcard pattern matching using * to rename multiple fields at once + - Allow space delimiting + + +#### Changes + - Added WildcardRenameUtils utility class with pattern matching and transformation + logic + - Updated CalciteRelNodeVisitor.java to support wildcard renaming for Calcite engine and change behavior for renaming nonexisting fields/renaming to existing field + - Modified PPL parser to accept * in wildcard expressions and delimit with space + +Field Rename Behavior: +* **Renaming a non-existent field to a non-existent field**: No change occurs to the result set. +* **Renaming a non-existent field to an existing field**: The existing target field is removed from the result set. +* **Renaming an existing field to an existing field**: The existing target field is removed, then the source field is renamed to the target. + + +#### Examples + - ```source=accounts | rename *name as *_name``` -- firstname → first_name + - ```source=accounts | rename * as old_*``` -- all fields get "old_" prefix + - ```source=data | rename old_* as *``` -- removes "old_ "prefix from matching fields + +- source and target field names much have matching number of wildcards +- `*` matches all fields + +Performance testing: +Baseline: +```source=big5 | head ``` +baseline 10: Avg = 16ms, P90 = 17ms +baseline 100: Avg = 23ms, P90 = 24ms +baseline 1K: Avg = 95ms, P90 = 97ms +baseline 10K: Avg = 816ms, P90 = 821ms +baseline 100K: Avg = 812ms, P90 = 815ms + +```source=big5 | rename @timestamp as ts | head ``` +10: Avg = 16ms, P90 = 17ms +100: Avg = 24ms, P90 = 24ms +1K: Avg = 95ms, P90 = 97ms +10K: Avg = 816ms, P90 = 820ms +100K: Avg = 813ms, P90 = 817ms + +```source=big5 | rename @timestamp as ts, agent.type as agent_type | head ``` +10: Avg = 17ms, P90 = 18ms +100: Avg = 24ms, P90 = 25ms +1K: Avg = 97ms, P90 = 99ms +10K: Avg = 832ms, P90 = 835ms +100K: Avg = 828ms, P90 = 833ms + +```source=big5 | rename agent* as new_* | head ``` +10: Avg = 16ms, P90 = 17ms +100: Avg = 23ms, P90 = 24ms +1K: Avg = 95ms, P90 = 97ms +10K: Avg = 815ms, P90 = 818ms +100K: Avg = 813ms, P90 = 819ms + +```source=big5 | rename *name as *_name | head ``` +10: Avg = 16ms, P90 = 17ms +100: Avg = 23ms, P90 = 24ms +1K: Avg = 95ms, P90 = 97ms +10K: Avg = 814ms, P90 = 822ms +100K: Avg = 811ms, P90 = 815ms + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] +#4008 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the changes! + + +### @ykmr1224 - CHANGES_REQUESTED + + +Field name could contain `.` such as nested field. Wondering if that works fine with current implementation. + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/rename.rst:22` + + +Add a test case of multiple rename, e.g. `rename *name as *_name, *_name as *@name` + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/RenameCommandIT.java:None` + + +Does `rename f*r*t* as F*R*T*` supported? + + +### @penghuo on `core/src/test/java/org/opensearch/sql/utils/WildcardRenameUtilsTest.java:80` + + +add a test case for partial matched. e.g. `f*r*tname, F*R*Tname`, it should be FiRTname? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +What happen if origin field name include speicial * character, e.g. +``` +rename `field*` as `myfield*` +``` + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Are all these changes for V2 engine support? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +If match failed, query should works, but no field renamed, right? + + +### @dai-chen on `docs/user/ppl/cmd/rename.rst:85` + + +Is multiple pattern supported? e.g., `rename *name as *_name, *age as *_age`. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Why chagne Analyzer? It is for calcite.enable=false? If yes, it is not required. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:470` + + +does cascading rename use case supported?, e.g. ` e.g. rename *name as *_name, *_name as *@name` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Yes, removed all of the changes for v2 support + + +### @ritvibhatt on `integ-test/src/test/java/org/opensearch/sql/ppl/RenameCommandIT.java:None` + + +Yes, added test for partial match to ```CalcitePPLRenameIT``` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Can't match/replace wildcard characters in field names, this is SPL behavior as well. Added a note in ```rename.rst``` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Yes, updated to just not rename anything if wildcard doesn't match any fields + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Removed changes for v2 engine support + + +### @ritvibhatt on `docs/user/ppl/cmd/rename.rst:85` + + +Yes it is, added as example 4 in ```rename.rst``` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:470` + + +Updated to support cascading rename for both wildcard and no wildcard and added test in ```CalcitePPLRenameIT``` + + +### @penghuo on `docs/user/ppl/cmd/rename.rst:None` + + +Add since 3.3 to indicate this is a new feature + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Add IT for failure case, and assert error message. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Add IT for failure case, and assert error message. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Is it expected? +``` +### +POST {{baseUrl}}/tttt/_bulk +Content-Type: application/x-ndjson + +{ "index": { "_id": 1 } } +{ "value*": 10, "id123": 1 } + + +### +POST {{baseUrl}}/_plugins/_ppl/ +Content-Type: application/x-ndjson + +{ + "query": "source=tttt " +} + +### +POST {{baseUrl}}/_plugins/_ppl +Content-Type: application/x-ndjson + +{ + "query": "source=tttt | rename `value*` as cde*" +} + +### Results +{ + "schema": [ + { + "name": "cde*", + "type": "bigint" + }, + { + "name": "id123", + "type": "bigint" + } + ], + "datarows": [ + [ + 10, + 1 + ] + ], + "total": 1, + "size": 1 +} +``` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:470` + + +Changed to update field names after each rename clause so next rename can see changes instead of all at once after all renames. Added ```testCascadingRename``` and ```testCascadingRenameWithWildcard``` in ```CalcitePPLRenameIT``` + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Yes, the ```*``` will match the ```*``` in the other field name but it can't be replaced in the rename, so trying to rename ```value*``` as ```cde``` won't work + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +nit: any reason to separate line? (can we simply do `String newName = ...;`) + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Can we simply do like `if (newNames.contains(newName) && ...)`? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Better extracting as `removeFieldIfExist(...)` + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Does this mean we call `rename` repeatedly? +Do we need to call it multiple time? Is it enough to call it once at the end of this method? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Is it find to directly match with targetPattern? (Does this mean we don't care wildcard here?) + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/utils/WildcardRenameUtilsTest.java:17` + + +nit: Missing `public` + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/utils/WildcardRenameUtilsTest.java:None` + + +Why LinkedHashSet? Plus, it is better using immutable collection such as `ImmutableSet.of(...)` whenever possible. (mutable object tend to cause problem, and use of immutable collection is recommended) + +Let's rename it to `availableFields` for better clarity. + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/utils/WildcardRenameUtilsTest.java:None` + + +nit: It is better splitting method than making code block by comment. Same for other places. + + +## General Comments + + +### @LantaoJin + + +Backporing is required manually in this PR. @ykmr1224 + + +### @ritvibhatt + + +@LantaoJin I raised the manual backport (#4250), trying to figure out test failures + + +--- + +# PR #4018: Make fields optional parameter in multi field relevance function. + +**URL:** https://github.com/opensearch-project/sql/pull/4018 + +**Author:** @vamsimanohar + +**Created:** 2025-08-11T16:27:02Z + +**State:** MERGED + +**Merged:** 2025-08-21T05:36:03Z + +**Changes:** +718 -76 (25 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + +**Assignees:** @vamsimanohar + + +## Description + +### Description + +This PR is the first step in making it easier to search in PPL. We want to support a simple "free text search" like the one described in [this issue](https://github.com/opensearch-project/sql/issues/4007) + +The goal is to let you write a simple search like this: +`search source = my_index "fatal error"` + +And our system will automatically translate it to this behind the scenes: +`search source = my_index | where simple_query_string("fatal error")` + + + +### Changes in this PR + +To prepare for the new free text search, I'm updating how a few of our search functions work. + +Now, you won't always have to provide a list of fields to search in. The `fields` parameter is now optional for functions like `simple_query_string` and `multi_match`. + +If you don't specify any fields, the query will automatically search the default fields set up for that index (specifically, in the `index.query.default_field` setting). + +Updated Documentation can be found here: [PPL Relevance Functions](https://github.com/vamsimanohar/sql/blob/search_text/docs/user/ppl/functions/relevance.rst) + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - CHANGES_REQUESTED + + +Please add some docs (comments or otherwise) to explain how this works. I'm able to follow individual methods and I _think_ I see an entrypoint, but I don't see how everything is linking together. + + +### @penghuo - DISMISSED + + +@songkant-aws Please take a look. + + +## Review Comments + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/expression/function/udf/RelevanceQueryFunction.java:39` + + +Can you add a doc explaining what the fields actually do here? I can follow the field parsing but it's not clear what their use is, or how this is related to "operand metadata" + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java:75` + + +Is it cleaner to create the ImmutableMap once here instead of building a Map from the fields list and freezing it at the end? + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/SimpleQueryStringQuery.java:24` + + +If I'm understanding right, this whole PR is just plumbing for this part where we build a query string and add some fields, right? + + +### @Swiddis on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:624` + + +I notice we're reimplementing this "build if present" logic multiple times for fields (I count at least 4). + +Is there some sort of safer helper we can provide to make this easier for callers, or are they all doing different things? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/expression/function/udf/RelevanceQueryFunction.java:39` + + +I have already added the documentation in description for fields. + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/SimpleQueryStringQuery.java:24` + + +yes...fields is not a required parameter, if not provided it will default to index.default.query_field setting. + + +### @vamsimanohar on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java:75` + + +I didn't get it..would you elaborate more? + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:624` + + +Are you talking about + +``` + if (!fields.isEmpty()) { + builder = builder.fields(fields); + } + ``` + + Are you taking about this? + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:624` + + +These are only 2-3 lines with different builder types. I don't see pull into helper functions would make it any better. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +If read default fields from index.query.default_field setting, then other change is not necessary? + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +The current changes are not big in the other files except for more test files and also would leave the setting dependency to underlying DSL implementation. + +Also, the above setting is a index specific setting, I need to somehow pass index into the context and add additional dependency on get Index settings call where the changes would be become more or less similar. + + +@penghuo let me know if you still want to do that for other reasons. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +make sense, I thought it is cluster setting. +for multiiple indicies case, DSL will handle it? + + + +### @vamsimanohar on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Good one, I need to see the behavior. IMO, it should handle. + + +### @songkant-aws on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:624` + + +With this check in AstBuilder, it will not produce RelevanceFieldList, right? In that case, I think you don't need the empty list check logic in `CalciteRexNodeVisitor.visitRelevanceFieldList` logic because it will not analyze that RexNode. + +Another option is to add empty RelevanceFieldList here anyway. Then no UDF metadata check logic is changed. The function will look like: `simple_query_string(NULL AS MAP, RexCall(MAP...), ...)`. And it may probably simplify some logic in PredicateAnalyzer by checking NULL literal instead of verifying parameters on different startIndices. + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:624` + + +My bad, this file changes are not removed. let me see the other approach if it reduces the code. + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:624` + + +I went with removing the the changes in RelevanceFieldList as the second approach is forcing me to change some V2 engine files and tests. + + +## General Comments + + +### @vamsimanohar + + +The integ test failure is unrelated to this PR + + +--- + +# PR #4017: Fix span on negative timestamp + +**URL:** https://github.com/opensearch-project/sql/pull/4017 + +**Author:** @qianheng-aws + +**Created:** 2025-08-11T09:31:42Z + +**State:** MERGED + +**Merged:** 2025-08-13T02:37:13Z + +**Changes:** +85 -11 (3 files) + +**Labels:** `bug`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description +Fix span on negative timestamp + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3827 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @dai-chen on `core/src/test/java/org/opensearch/sql/utils/DateTimeUtilsTest.java:21` + + +np: this is actually UT for `DateTimeUnit`? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:71` + + +Not sure if I understand correct: this can be abstract as + +``` +static long roundMonth(long utcMillis, int interval) { return roundByMonths(utcMillis, interval); } +static long roundQuarter(long utcMillis, int interval) { return roundByMonths(utcMillis, interval * 3); } +static long roundYear(long utcMillis, int interval) { return roundByMonths(utcMillis, interval * 12); } +``` + + +### @qianheng-aws on `core/src/test/java/org/opensearch/sql/utils/DateTimeUtilsTest.java:21` + + +No, It's actually testing the `round` API for each DateTimeUnit, and that APIs are only called by `DateTimeUtilsT`, so I put the test case here. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:71` + + +Agree, seems that could be abstracted. This focus on bugfix and just keep the exist implementation without too much refinement. Maybe that refinement could be done next time if we need further related change. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4016: Eliminate reliance on assert in Calcite for integration test + +**URL:** https://github.com/opensearch-project/sql/pull/4016 + +**Author:** @yuancu + +**Created:** 2025-08-11T04:15:16Z + +**State:** MERGED + +**Merged:** 2025-08-11T18:15:52Z + +**Changes:** +125 -73 (5 files) + +**Labels:** `testing`, `backport 3.2` + + +## Description + +### Description + +Previously, the test `failWhenNumOfColumnsNotMatchOutputOfSubquery` relied on `assert` for integration test. However, assert is ignored in production in Java by default. This PR address the problem by eliminating the reliance on `assert` in the test. + +Additionally, this PR enfores `script.disable_max_compilations_rate increases` to `true` for SQL & PPL integration tests, and set `script.context.*.max_compilations_rate` to `unlimited` as fallback . + +### Related Issues +Resolves #3896 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the fix! I need to merge this to unblock 3.2 release if no major comment. + + +## Review Comments + + +### @LantaoJin on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:None` + + +No need to add these config keys if they are used only for IT. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java:343` + + +Just wonder what's the difference between this setting and `script.max_compilations_rate`? + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java:343` + + +`script.max_compilations_rate` is deprecated. + + +### @yuancu on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:None` + + +Removed from settings. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4015: Publish legacy module for downstream reuse in unified-ppl + +**URL:** https://github.com/opensearch-project/sql/pull/4015 + +**Author:** @Mebsina + +**Created:** 2025-08-08T23:31:21Z + +**State:** MERGED + +**Merged:** 2025-08-21T00:17:08Z + +**Changes:** +1 -1 (1 files) + + +## Description + +### Description +Adding `legacy` submodule because `datasources` depends on it. + +``` +./gradlew publishUnifiedQueryPublicationToMavenLocal + +tree -d ~/.m2/repository/org/opensearch/query/unified-query-legacy + +└── 3.1.0.0-SNAPSHOT +``` + +### Related Issues +[3935](https://github.com/opensearch-project/sql/pull/3935) + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4013: [Backport 2.19-dev] fix snapshot uploading (#4006) + +**URL:** https://github.com/opensearch-project/sql/pull/4013 + +**Author:** @ahkcs + +**Created:** 2025-08-08T18:17:24Z + +**State:** MERGED + +**Merged:** 2025-08-08T19:27:21Z + +**Changes:** +584 -0 (3 files) + + +## Description + +Backporting PR https://github.com/opensearch-project/sql/pull/4006 to 2.x + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4012: [Backport 2.x] fix snapshot uploading (#4006) + +**URL:** https://github.com/opensearch-project/sql/pull/4012 + +**Author:** @ahkcs + +**Created:** 2025-08-08T18:13:36Z + +**State:** MERGED + +**Merged:** 2025-08-08T19:26:25Z + +**Changes:** +584 -0 (3 files) + + +## Description + +Backporting PR https://github.com/opensearch-project/sql/pull/4006 to 2.x + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4011: [Backport 2.19-dev] Add missing udfs in v3 + +**URL:** https://github.com/opensearch-project/sql/pull/4011 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-08T08:17:10Z + +**State:** MERGED + +**Merged:** 2025-08-11T02:18:26Z + +**Changes:** +438 -9 (7 files) + + +## Description + +Backport d6aac3c8943dd9c946f1859638e0fe446045e147 from #3957. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4010: Support script push down on text field + +**URL:** https://github.com/opensearch-project/sql/pull/4010 + +**Author:** @qianheng-aws + +**Created:** 2025-08-08T04:45:53Z + +**State:** MERGED + +**Merged:** 2025-08-19T09:13:59Z + +**Changes:** +183 -81 (9 files) + +**Labels:** `enhancement`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Support script push down on text field + +For the new added TODO in the PR, I've created an related issue in core and could be tracked here: https://github.com/opensearch-project/OpenSearch/issues/18985 + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4003, +https://github.com/opensearch-project/sql/issues/3950 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:154` + + +minor: why we limit the explain test on pushdown enabled only? we'd better assert the plan without pushdown + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:348` + + +Can we add a UT for this case since we are removing this limitation. + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:154` + + +The test only wants to verify the feature of script push down on text type is able to take effect. + +For no pushdown, it has no need to add so many tests on types of text or non-text, since that makes no difference for no pushdown case + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:348` + + +Done + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:195` + + +This may have significant performance impact right? Just wonder do we really need to support text field without keyword/docvalue? I assume this is new added feature compared with V2. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:195` + + +Yes, it does. But comparing to fetch source to coordinator, it allows parallel execution and no need to fetch source across nodes. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:195` + + +Is there any query in big5 or clickbench benchmark to support the performance improvement? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:195` + + +No, but I've generated some queries run against big5: +``` +// q1 , will match and return the first row in the first window +source = big5 | where message = "2024-03-13T06:53:15.372Z Mar 13 06:53:15 ip-30-207-142-94 udev: whitehare"| head 10 + +// q2, no rows will match after changing the time to 2034 +source = big5 | where message = "2034-03-13T06:53:15.372Z Mar 13 06:53:15 ip-30-207-142-94 udev: whitehare"| head 10 + +// q3, the first window will satisfy retrieving 10 rows. +source = big5 | where length(message) > 10 | head 10 + +// q4 +source = big5 | where length(message) > 200 | head 10 +``` + +The benchmark results is: +| query | Before(NO PUSH) | After(PUSH DOWN) | +| --- | --- | --- | +| q1 | timeout, need fetch all rows | 1m1.101s | +| q2 | timeout, need fetch all rows | 1m0.192s | +| q3 | 0.215s | 0.125s | +| q4 | timeout, need fetch all rows | 0.525s | + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4006: fix snapshot uploading + +**URL:** https://github.com/opensearch-project/sql/pull/4006 + +**Author:** @ahkcs + +**Created:** 2025-08-07T21:27:37Z + +**State:** MERGED + +**Merged:** 2025-08-08T17:48:05Z + +**Changes:** +8 -5 (3 files) + +**Labels:** `infrastructure`, `backport 2.x`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +Update snapshot uploading + +I changed the path to fetch the publish-snapshot.sh(https://github.com/opensearch-project/opensearch-build/blob/main/publish/publish-snapshot.sh) from opensearch-build-libraries to opensearch-build since this file no longer exists in opensearch-build-libraries + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @noCharger on `.github/workflows/publish-async-query-core.yml:74` + + +can we make it a var so it can be reused in multiple files? + + +## General Comments + + +### @noCharger + + +Let's add backport labels to corresponding branches + + +--- + +# PR #4002: Prevent aggregation push down when it has inner filter + +**URL:** https://github.com/opensearch-project/sql/pull/4002 + +**Author:** @qianheng-aws + +**Created:** 2025-08-07T07:29:09Z + +**State:** MERGED + +**Merged:** 2025-08-11T18:29:58Z + +**Changes:** +159 -12 (9 files) + +**Labels:** `bug`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev`, `backport 3.2` + + +## Description + +### Description +Prevent aggregation push down when it has inner filter, also prevent push down agg on window function. + + +### Related Issues +Resolves +https://github.com/opensearch-project/sql/issues/3996 +https://github.com/opensearch-project/sql/issues/3999 +https://github.com/opensearch-project/sql/issues/4009 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3996.yml:None` + + +can be removed + + +### @LantaoJin on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3996.yml:None` + + +ditto + + +## General Comments + + +### @LantaoJin + + +Since this patch fixes a correctness issue, I suggest to backport it to 3.2. + + +--- + +# PR #4001: Allow equal expression as a function argument + +**URL:** https://github.com/opensearch-project/sql/pull/4001 + +**Author:** @yuancu + +**Created:** 2025-08-07T06:05:05Z + +**State:** MERGED + +**Merged:** 2025-08-18T07:26:14Z + +**Changes:** +24 -4 (3 files) + +**Labels:** `bug`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description + +This PR allows comparison expressions with equal as a function argument. + +Previously, arguments in expressions like `eval res = foo(param1=1, param2='b')` would be treated as named arguments. They would match the following rule: + +```antlr +functionArg + : (ident EQUAL)? functionArgExpression + ; +``` + +However, this prevented users from passing comparison expressions like `filed=target` as a parameter, which are widely used in condition functions like `IF`, `CASE`, etc. + +I found that named parameters are used exclusively in table functions. This PR fixes the problem by eliminating name parameters in other functions. + +### Related Issues +Resolves #3992 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:606` + + +If users name the argument in the meanwhile, it will be `if(isNameJake=name='Jake' ...)` ? + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:606` + + +After the change, only table functions support named arguments. `IF` is not one of them. `if(isNameJake=name='Jake' ...)` will not be a valid expression. For table functions, based on the following rules: + +```antlr +tableFunction + : qualifiedName LT_PRTHS namedFunctionArgs RT_PRTHS + ; + +namedFunctionArg + : (ident EQUAL)? functionArgExpression + ; + +functionArgExpression + : lambda + | logicalExpression + ; + +logicalExpression + : NOT logicalExpression # logicalNot + | left = logicalExpression AND right = logicalExpression # logicalAnd + | left = logicalExpression XOR right = logicalExpression # logicalXor + | left = logicalExpression OR right = logicalExpression # logicalOr + | expression # logicalExpr + ; +``` + +I think you are right. + +BTW, I haven't found any official definitions for table functions in OpenSearch, although they present in the grammar file. Do you know some examples of them? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:606` + + +I see. Could you create follow up issue if so? To support both, we may need breaking change right? +I recall table function was related to PromQL support. Ref: https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/admin/connectors/prometheus_connector.rst#query_range-table-function + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:606` + + +Thank you for the source! Do you mean to support named arguments for all functions by *to support both*? I am a little confused in why should we do so. Afaik, named arguments isn't mentioned anywhere in PPL specifications. And we had no plan to support out-of-order arguments even for functions with named arguments. Moreover, it's hard to tell whether `if(a=bool_column, ...)` is a named argument or a logical expression if we allow named arguments for all functions. + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:606` + + +Sorry for confusion. I asked because I saw `if(predicate:code=200` in SPL doc: https://help.splunk.com/en/splunk-cloud-platform/search/spl2-search-manual/functions/naming-function-arguments. Never mind if this is not planned. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:606` + + +Thanks for the explanation. That's a specific grammar. They use `:` to avoid the dilemma of telling apart logical comparison and assignment, + + +## General Comments + + +### @yuancu + + +@qianheng-aws Can you help merge this PR? Thanks! + + +### @yuancu + + +@LantaoJin Should this PR be backported? + + +--- + +# PR #3993: Support timechart command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3993 + +**Author:** @selsong + +**Created:** 2025-08-06T17:52:28Z + +**State:** MERGED + +**Merged:** 2025-09-04T05:49:44Z + +**Changes:** +1898 -2 (25 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Implement timechart in PPL to accept aggregation` span` logic and aggregation functions. This includes support for parameters such as `span`, `limit`, `useother`, a single aggregation function, by field. This does not include pivot formatting. + +**Time Binning** +- Currently the default behavior for time binning is span=1m because the bin command is being implemented and has not been merged yet. Once bin is merged, the default behavior for time binning will be bins=100. Timechart will then also be modified in a 2nd PR to support all bin options including span, bin, minspan, aligntime. + +**Limitations:** (to be resolved in follow up PR) +* Pivot is not supported in timechart. +* Only a single aggregation function is supported per timechart command. +* The ``bins`` parameter and other bin options are not supported since the ``bin`` command is not implemented yet. + +### Related Issues +Resolves #3965 + +### 1. single aggregation function +**Query:** +``` +source=events | timechart span=1m avg(cpu_usage) +``` + +**Result:** +``` +{ + "schema": [ + { + "name": "@timestamp", + "type": "timestamp" + }, + { + "name": "avg(cpu_usage)", + "type": "float" + } + ], + "datarows": [ + [ + "2024-07-01 00:00:00", + 45.2 + ], + [ + "2024-07-01 00:01:00", + 38.7 + ], + [ + "2024-07-01 00:02:00", + 55.3 + ], + [ + "2024-07-01 00:03:00", + 42.1 + ], + [ + "2024-07-01 00:04:00", + 41.8 + ] + ], + "total": 5, + "size": 5 +} +``` + +**Logical Plan:** +``` +"LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], dir0=[ASC])\n LogicalAggregate(group=[{1}], agg#0=[AVG($0)])\n LogicalProject(cpu_usage=[$2], $f2=[SPAN($1, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n" +``` + +**Physical Plan:** +``` +"EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#377:LogicalAggregate.NONE.[](input=RelSubset#376,group={1},agg#0=AVG($0)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"$f2\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"$f1\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" +``` + +### 2. aggregation with by field +**Query:** +``` +source=events | timechart span=1h count() by host +``` + +**Result:** +``` +{ + "schema": [ + { + "name": "@timestamp", + "type": "timestamp" + }, + { + "name": "host", + "type": "string" + }, + { + "name": "count", + "type": "bigint" + } + ], + "datarows": [ + [ + "2024-07-01 00:00:00", + "web-01", + 1 + ], + [ + "2024-07-01 00:01:00", + "web-02", + 1 + ], + [ + "2024-07-01 00:02:00", + "web-01", + 1 + ], + [ + "2024-07-01 00:03:00", + "db-01", + 1 + ], + [ + "2024-07-01 00:04:00", + "web-02", + 1 + ] + ], + "total": 5, + "size": 5 +}% +``` + +**Logical Plan:** +``` + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n LogicalAggregate(group=[{0, 1}], count=[SUM($2)])\n LogicalProject(@timestamp=[$1], host=[CASE(IS NOT NULL($3), $0, 'OTHER')], count=[$2])\n LogicalJoin(condition=[=($0, $3)], joinType=[left])\n LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()])\n LogicalProject(host=[$0], $f2=[SPAN($1, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10])\n LogicalAggregate(group=[{0}], grand_total=[SUM($2)])\n LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()])\n LogicalProject(host=[$0], $f2=[SPAN($1, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n" +``` + +**Physical Plan:** +``` +"EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n EnumerableAggregate(group=[{0, 1}], count=[$SUM0($2)])\n EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=['OTHER'], expr#7=[CASE($t5, $t0, $t6)], @timestamp=[$t1], host=[$t7], count=[$t2])\n EnumerableMergeJoin(condition=[=($0, $3)], joinType=[left])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#795:LogicalAggregate.NONE.[](input=RelSubset#794,group={0, 1},agg#0=COUNT()), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f2\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"$f2_0\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableLimit(fetch=[10])\n EnumerableSort(sort0=[$1], dir0=[DESC])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#837:LogicalAggregate.NONE.[](input=RelSubset#794,group={0},grand_total=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"grand_total\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" +``` +### 3. `span` parameter +**Query:** +``` +source=events | timechart span=1s count() by region +``` + +**Result:** +``` +{ + "schema": [ + { + "name": "@timestamp", + "type": "timestamp" + }, + { + "name": "region", + "type": "string" + }, + { + "name": "count", + "type": "bigint" + } + ], + "datarows": [ + [ + "2024-07-01 00:00:00", + "us-east", + 1 + ], + [ + "2024-07-01 00:01:00", + "us-west", + 1 + ], + [ + "2024-07-01 00:02:00", + "us-east", + 1 + ], + [ + "2024-07-01 00:03:00", + "eu-west", + 1 + ], + [ + "2024-07-01 00:04:00", + "us-west", + 1 + ] + ], + "total": 5, + "size": 5 +} +``` + +**Logical Plan:** +``` + "LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n LogicalAggregate(group=[{0, 1}], count=[SUM($2)])\n LogicalProject(@timestamp=[$1], region=[CASE(IS NOT NULL($3), $0, 'OTHER')], count=[$2])\n LogicalJoin(condition=[=($0, $3)], joinType=[left])\n LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()])\n LogicalProject(region=[$3], $f2=[SPAN($1, 1, 's')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10])\n LogicalAggregate(group=[{0}], grand_total=[SUM($2)])\n LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()])\n LogicalProject(region=[$3], $f2=[SPAN($1, 1, 's')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n" +``` + +**Physical Plan:** +``` +"EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n EnumerableAggregate(group=[{0, 1}], count=[$SUM0($2)])\n EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=['OTHER'], expr#7=[CASE($t5, $t0, $t6)], @timestamp=[$t1], region=[$t7], count=[$t2])\n EnumerableMergeJoin(condition=[=($0, $3)], joinType=[left])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#1413:LogicalAggregate.NONE.[](input=RelSubset#1412,group={0, 1},agg#0=COUNT()), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"region\":{\"terms\":{\"field\":\"region.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f2\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1s\"}}}]},\"aggregations\":{\"$f2_0\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableLimit(fetch=[10])\n EnumerableSort(sort0=[$1], dir0=[DESC])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#1455:LogicalAggregate.NONE.[](input=RelSubset#1412,group={0},grand_total=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"region\":{\"terms\":{\"field\":\"region.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"grand_total\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" +``` +### 4. Default `limit` = 10 and `useother` = true +For by fields with more than 10 values, timechart will automatically group the extra values into an OTHER field. +**Query:** +``` +source=events_many_hosts | timechart span=1h avg(cpu_usage) by host +``` + +**Result:** +``` +{ + "schema": [ + { + "name": "@timestamp", + "type": "timestamp" + }, + { + "name": "host", + "type": "string" + }, + { + "name": "avg(cpu_usage)", + "type": "double" + } + ], + "datarows": [ + [ + "2024-07-01 00:00:00", + "OTHER", + 35.900001525878906 + ], + [ + "2024-07-01 00:00:00", + "web-01", + 45.20000076293945 + ], + [ + "2024-07-01 00:00:00", + "web-02", + 38.70000076293945 + ], + [ + "2024-07-01 00:00:00", + "web-03", + 55.29999923706055 + ], + [ + "2024-07-01 00:00:00", + "web-04", + 42.099998474121094 + ], + [ + "2024-07-01 00:00:00", + "web-05", + 41.79999923706055 + ], + [ + "2024-07-01 00:00:00", + "web-06", + 39.400001525878906 + ], + [ + "2024-07-01 00:00:00", + "web-07", + 48.599998474121094 + ], + [ + "2024-07-01 00:00:00", + "web-08", + 44.20000076293945 + ], + [ + "2024-07-01 00:00:00", + "web-09", + 67.80000305175781 + ], + [ + "2024-07-01 00:00:00", + "web-11", + 43.099998474121094 + ] + ], + "total": 11, + "size": 11 +} +``` +**Logical Plan:** +``` +"LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)])\n LogicalProject(@timestamp=[$1], host=[CASE(IS NOT NULL($3), $0, 'OTHER')], avg(cpu_usage)=[$2])\n LogicalJoin(condition=[=($0, $3)], joinType=[left])\n LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)])\n LogicalProject(host=[$1], cpu_usage=[$0], $f3=[SPAN($2, 1, 'h')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10])\n LogicalAggregate(group=[{0}], grand_total=[SUM($2)])\n LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)])\n LogicalProject(host=[$1], cpu_usage=[$0], $f3=[SPAN($2, 1, 'h')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n", +``` +**Physical Plan:** +``` + "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)])\n EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=['OTHER'], expr#7=[CASE($t5, $t0, $t6)], @timestamp=[$t1], host=[$t7], avg(cpu_usage)=[$t2])\n EnumerableMergeJoin(condition=[=($0, $3)], joinType=[left])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#1926:LogicalAggregate.NONE.[](input=RelSubset#1925,group={0, 2},agg#0=AVG($1)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f3\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1h\"}}}]},\"aggregations\":{\"$f2\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableLimit(fetch=[10])\n EnumerableSort(sort0=[$1], dir0=[DESC])\n EnumerableAggregate(group=[{0}], grand_total=[SUM($2)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#1926:LogicalAggregate.NONE.[](input=RelSubset#1925,group={0, 2},agg#0=AVG($1)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f3\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1h\"}}}]},\"aggregations\":{\"$f2\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" +``` +### 5. `limit` parameter +**Query:** +``` +source=events_many_hosts | timechart span=1m limit=3 avg(response_time) by host +``` +Lowest sum of aggregation function results are grouped into the OTHER column so only top 3 hosts by avg(response_time) are shown, displayed in their original order. +**Result:** +``` +{ + "schema": [ + { + "name": "@timestamp", + "type": "timestamp" + }, + { + "name": "host", + "type": "string" + }, + { + "name": "avg(cpu_usage)", + "type": "double" + } + ], + "datarows": [ + [ + "2024-07-01 00:00:00", + "OTHER", + 330.4000015258789 + ], + [ + "2024-07-01 00:00:00", + "web-03", + 55.29999923706055 + ], + [ + "2024-07-01 00:00:00", + "web-07", + 48.599998474121094 + ], + [ + "2024-07-01 00:00:00", + "web-09", + 67.80000305175781 + ] + ], + "total": 4, + "size": 4 +} +``` + +**Logical Plan:** +``` + "LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)])\n LogicalProject(@timestamp=[$1], host=[CASE(IS NOT NULL($3), $0, 'OTHER')], avg(cpu_usage)=[$2])\n LogicalJoin(condition=[=($0, $3)], joinType=[left])\n LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)])\n LogicalProject(host=[$1], cpu_usage=[$0], $f3=[SPAN($2, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n LogicalSort(sort0=[$1], dir0=[DESC], fetch=[3])\n LogicalAggregate(group=[{0}], grand_total=[SUM($2)])\n LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)])\n LogicalProject(host=[$1], cpu_usage=[$0], $f3=[SPAN($2, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n", +``` + +**Physical Plan:** +``` +"EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)])\n EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=['OTHER'], expr#7=[CASE($t5, $t0, $t6)], @timestamp=[$t1], host=[$t7], avg(cpu_usage)=[$t2])\n EnumerableMergeJoin(condition=[=($0, $3)], joinType=[left])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#1663:LogicalAggregate.NONE.[](input=RelSubset#1662,group={0, 2},agg#0=AVG($1)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f3\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"$f2\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableLimit(fetch=[3])\n EnumerableSort(sort0=[$1], dir0=[DESC])\n EnumerableAggregate(group=[{0}], grand_total=[SUM($2)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#1663:LogicalAggregate.NONE.[](input=RelSubset#1662,group={0, 2},agg#0=AVG($1)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f3\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"$f2\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" +``` +### 6. `useother` parameter +**Query:** +``` +source=events_many_hosts | timechart span=1m useother=f avg(response_time) by host +``` +Accepts useother=t, f, true, or false. Default limit of 10 is applied since no limit is specified, but OTHER column is omitted since useother is set to false. +**Result:** +``` +{ + "schema": [ + { + "name": "@timestamp", + "type": "timestamp" + }, + { + "name": "host", + "type": "string" + }, + { + "name": "avg(cpu_usage)", + "type": "double" + } + ], + "datarows": [ + [ + "2024-07-01 00:00:00", + "web-01", + 45.20000076293945 + ], + [ + "2024-07-01 00:00:00", + "web-02", + 38.70000076293945 + ], + [ + "2024-07-01 00:00:00", + "web-03", + 55.29999923706055 + ], + [ + "2024-07-01 00:00:00", + "web-04", + 42.099998474121094 + ], + [ + "2024-07-01 00:00:00", + "web-05", + 41.79999923706055 + ], + [ + "2024-07-01 00:00:00", + "web-06", + 39.400001525878906 + ], + [ + "2024-07-01 00:00:00", + "web-07", + 48.599998474121094 + ], + [ + "2024-07-01 00:00:00", + "web-08", + 44.20000076293945 + ], + [ + "2024-07-01 00:00:00", + "web-09", + 67.80000305175781 + ], + [ + "2024-07-01 00:00:00", + "web-11", + 43.099998474121094 + ] + ], + "total": 10, + "size": 10 +} +``` + +**Logical Plan:** +``` + "LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n LogicalFilter(condition=[<>($1, 'OTHER')])\n LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)])\n LogicalProject(@timestamp=[$1], host=[CASE(IS NOT NULL($3), $0, 'OTHER')], avg(cpu_usage)=[$2])\n LogicalJoin(condition=[=($0, $3)], joinType=[left])\n LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)])\n LogicalProject(host=[$1], cpu_usage=[$0], $f3=[SPAN($2, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10])\n LogicalAggregate(group=[{0}], grand_total=[SUM($2)])\n LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)])\n LogicalProject(host=[$1], cpu_usage=[$0], $f3=[SPAN($2, 1, 'm')])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n", +``` + +**Physical Plan:** +``` +"EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)])\n EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t0)], expr#6=['OTHER'], expr#7=[CASE($t5, $t2, $t6)], @timestamp=[$t3], host=[$t7], avg(cpu_usage)=[$t4])\n EnumerableMergeJoin(condition=[=($0, $2)], joinType=[inner])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $condition=[$t2])\n EnumerableLimit(fetch=[10])\n EnumerableSort(sort0=[$1], dir0=[DESC])\n EnumerableAggregate(group=[{0}], grand_total=[SUM($2)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#584:LogicalAggregate.NONE.[](input=RelSubset#583,group={0, 2},agg#0=AVG($1)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f3\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"$f2\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[FILTER-><>($1, 'OTHER'), AGGREGATION->rel#798:LogicalAggregate.NONE.[](input=RelSubset#797,group={0, 2},agg#0=AVG($1)), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"exists\":{\"field\":\"host\",\"boost\":1.0}}],\"must_not\":[{\"term\":{\"host.keyword\":{\"value\":\"OTHER\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"host\":{\"terms\":{\"field\":\"host.keyword\",\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"asc\"}}},{\"$f3\":{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"$f2\":{\"avg\":{\"field\":\"cpu_usage\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" +``` + +### Performance with Big5 Testing +``` +source=big5 | head | timechart span=1h count() +``` +- head 10: Avg ≈ 38 ms, P90 ≈ 43 ms +(span=1h, 1m, 1s, avg metrics.size, count by cloud.region) +- head 100: Avg ≈ 59 ms, P90 ≈ 69 ms +(span=1h, 1m, 1s and count by cloud.region) +- head 1000: Avg ≈ 74 ms, P90 ≈ 97 ms +(includes spans 1s, 1m, 1h count queries) +- head 10000: Avg: 310 ms + +``` +source=big5 | head | timechart span=1h count() by cloud.region +``` +Head 10: 64 ms +Head 100: 75 ms +Head 1 000: 113 ms +Head 10 000: Avg = 628 ms (p90 ≈ 630 ms) + +For both queries: +- Over head 10000 will hit Circuitbreaker. Tested 20K - 100K each run took roughly 650 to 820 ms before failing due to +``` +Data too large, data for [_id] would be [433994215/413.8mb], which is larger than the limit of [429496729/409.5mb]]] +``` +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +Non-blocking comments + + +## Review Comments + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:None` + + +This can be generalized to bin option expression? I assume in future this could be span, bins etc. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:None` + + +I think constructor can only take required arguments, e.g., `new TimeChart(aggFunc).span(...).by(...)` + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:26` + + +This should be a list? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +I think we can simplify this like `relevanceArg`? Also we can consider extracting common parameters like bin options so timechart and bin command can share. + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java:None` + + +I feel this is too much to assert. Probably we can make test data smaller? +Same for examples in `timechart.rst ` + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Just curious what the explain output looks like? I'm thinking showing the explain of second query plan is more useful? + + +### @dai-chen on `plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java:None` + + +I think we should avoid touching this class as well as `PPLQueryRequest`. Need to deep dive into your dynamic pivot executor to see where is the best place to implement this two phase execution. + + +### @selsong on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:26` + + +Currently, this PR only supports a single aggregation function instead of a list that handles multiple for P0 functionality. I can add multiple aggregation function support in later PR. + + +### @selsong on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java:None` + + +Thanks! Agreed, made test data smaller. events_many_hosts.json still has 11 hosts since it is the minimum to test default limit=10. Updated IT and rst doc too. + + +### @selsong on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +I added explain_timechart.json to show the explain output for logical and physical plan. I updated PR description to show that current approach is OpenSearch native aggregation then post-processing pivot in TimechartResponseFormatter, so there is no 2nd query plan. The explain output only includes aggregation pushdown. + +The OpenSearch single query is: +``` +{ + "aggs": { + "composite": { + "sources": [ + {"time_bucket": {"date_histogram": {"field": "timestamp", "interval": "1m"}}}, + {"host": {"terms": {"field": "host.keyword"}}} + ] + }, + "aggs": {"avg_cpu": {"avg": {"field": "cpu_usage"}}} + } +} +``` +This single query returns: +``` +{ + "buckets": [ + {"key": {"time_bucket": "2024-01-01T00:00:00", "host": "web-01"}, "avg_cpu": {"value": 45.2}}, + {"key": {"time_bucket": "2024-01-01T00:00:00", "host": "db-01"}, "avg_cpu": {"value": 78.1}}, + {"key": {"time_bucket": "2024-01-01T00:01:00", "host": "web-01"}, "avg_cpu": {"value": 52.3}} + ] +} +``` + + +### @ykmr1224 on `protocol/src/main/java/org/opensearch/sql/protocol/response/format/TimechartResponseFormatter.java:None` + + +Why do we need specific formatter for Timechart? Isn't it just a subset of Json? + + +### @ykmr1224 on `plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java:None` + + +nit: I think we don't need to nest the if block. (let's just do like `} else if (pplRequest... ) {`) + + +### @ykmr1224 on `plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java:None` + + +Would it mis-judge when query somehow contains string `timechart`? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java:None` + + +This looks weird to me. Should we reuse `this` and just set span? Same for the following methods. +It might be better using `@Builder` lombok annotation. + + +### @Swiddis on `protocol/src/main/java/org/opensearch/sql/protocol/response/format/TimechartResponseFormatter.java:None` + + +This is a long function (136 lines), consider breaking it into simpler components next time. Helps with testing + reasoning through how it works. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Should we add constants (might be enum) for "timeField", "byField", "valueField", "limit", and "useOther"? + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1410` + + +This method's also pretty long & it's hard to parse what the high-level approach is. Effectively need to read all lines to understand what it's doing. + +I'm a big fan of [Ousterhout's commenting notes](https://web.stanford.edu/~ouster/cgi-bin/cs190-spring16/lecture.php?topic=comments). + + +### @selsong on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1410` + + +Thanks, added high level comment. + + +### @selsong on `protocol/src/main/java/org/opensearch/sql/protocol/response/format/TimechartResponseFormatter.java:None` + + +Thanks, refactored into simpler components. + + +### @dai-chen on `plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java:None` + + +Since there are follow up work, I wonder can we avoid such changes. Can we simply assume the second column is always the one to pivot? I think we can merge this for now if we can avoid touching these PPL classes. + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:31` + + +Doc does not format correcttly. The Default should in newline? https://github.com/selsong/sql/blob/f3d987f46ec42189b1484abf3a56a61cb60ab500/docs/user/ppl/cmd/timechart.rst + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +Why restrict on position? Is there impl limitation? + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +We only support 5?, instead of all [stats function](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/stats.rst#aggregation-functions). + + +### @selsong on `plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java:None` + + +Thanks. You're right I think the check can be more robust. I added more robust detection. + + +### @selsong on `protocol/src/main/java/org/opensearch/sql/protocol/response/format/TimechartResponseFormatter.java:None` + + +The TimechartResponseFormatter transforms data from [timestamp, field, value] format to **pivot table format** [timestamp, field1_value, field2_value, ...] for visualization compatibility. It handles timechart-specific operations like data pivoting, limit enforcement with "OTHER" grouping, and score-based column sorting that the standard JSON formatter doesn't support. This is approach 3 chosen for now, with more discussion on the Dynamic Pivot Implementation Approach on the RFC #3965 + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +why column name is $f2, it should be `@timestamp`? + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +add explaintaion of `most distinct` + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +count result should be 0, instead of null? + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +what is result? + + +### @penghuo on `docs/user/ppl/cmd/timechart.rst:None` + + +This query will display the top 2 hosts with the highest CPU usage values -> +This query will display the top 2 hosts with the highest sum of avg CPU usage values? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3991: [Backport 2.19-dev] Support `reverse` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3991 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-06T16:35:49Z + +**State:** MERGED + +**Merged:** 2025-08-07T19:22:02Z + +**Changes:** +541 -0 (13 files) + + +## Description + +Backport c05a58c3e6f8e469d42a4830a42ee8413aa2e5d2 from #3867. + + + +## Reviews + + +### @dai-chen - APPROVED + + +Could you double check the CI failure? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @dai-chen + + +Retrying CI: + +``` +2025-08-06T17:36:43.0076460Z Tests with failures: +2025-08-06T17:36:43.0154820Z - org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT.testQ7 +``` + + +--- + +# PR #3989: [PPL] Patterns command add UUID regex into log template parsing + +**URL:** https://github.com/opensearch-project/sql/pull/3989 + +**Author:** @Hailong-am + +**Created:** 2025-08-06T02:55:48Z + +**State:** MERGED + +**Merged:** 2025-08-21T09:10:46Z + +**Changes:** +38 -0 (3 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description + +Add UUID regex into template parsing + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +Any IT for this change? And could you also update user doc patterns.rst. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Hailong-am + + +@songkant-aws can you help to review? + + +### @songkant-aws + + +@Hailong-am LGTM. Please also make sure you verified the expected result on your test log data. + +@qianheng-aws @LantaoJin Could you help approve and merge it? + + +### @songkant-aws + + +Minor: spotless check failed + + +### @Hailong-am + + +> Minor: spotless check failed + +fixed + + +### @Hailong-am + + +> Any IT for this change? And could you also update user doc patterns.rst. + +IT added. For the doc `patterns.rst.` it's a internal logic update, no new parameter added. + + +--- + +# PR #3986: Add sum and avg functions in eval + +**URL:** https://github.com/opensearch-project/sql/pull/3986 + +**Author:** @vamsimanohar + +**Created:** 2025-08-06T00:10:25Z + +**State:** MERGED + +**Merged:** 2025-08-18T23:47:35Z + +**Changes:** +596 -6 (6 files) + +**Labels:** `enhancement`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +The current PR adds avg, sum functions to eval and where commands. These are different from sum and avg aggregate functions which works across rows but these functions work + +### Related Issues +Resolves #3998 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +I’m wondering if we evaluated other implementation options. If I understand correctly, SUM(a, b, c) could be rewritten as an arithmetic expression, for example: `eval X = a + b + c`. The same approach also works for `AVG`? + + +### @Swiddis - CHANGES_REQUESTED + + +One minor change/concern, otherwise LGTM + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +### @Swiddis on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +We're only doing recursion on one side here for the list, is that not going to cause issues for larger lists? Historically we've had to rewrite these to produce balanced trees. See e.g. https://github.com/opensearch-project/sql/pull/3660 + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + ++1 + + +### @vamsimanohar on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:None` + + +sure will change. + +Note: +if some one writes a large expression without using sum. Eg: 1 + 2 + 3..... +ANTLR creates a left leaning tree anyways. which requires a more involved change in grammar to optimize. + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java:279` + + +np: I assume we will add more function like `min`, `max`, please see how to make this more maintainable. Maybe a generic reduce method like https://spark.apache.org/docs/latest/api/sql/index.html#reduce? + + +## General Comments + + +### @vamsimanohar + + +> I’m wondering if we evaluated other implementation options. If I understand correctly, SUM(a, b, c) could be rewritten as an arithmetic expression, for example: `eval X = a + b + c`. The same approach also works for `AVG`? + +Make sense..let me relook at the PR. + + +--- + +# PR #3985: [AUTO] Add release notes for 3.2.0 + +**URL:** https://github.com/opensearch-project/sql/pull/3985 + +**Author:** @opensearch-ci-bot + +**Created:** 2025-08-05T20:25:38Z + +**State:** MERGED + +**Merged:** 2025-08-05T21:54:54Z + +**Changes:** +54 -0 (1 files) + +**Labels:** `documentation` + + +## Description + +Add release notes for 3.2.0 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @dai-chen + + +Enforce PR label CI is failing on PR 3985. + + +--- + +# PR #3983: ResourceMonitor only checks the memory health by calculating the memory usage after GC of old gen in v3 + +**URL:** https://github.com/opensearch-project/sql/pull/3983 + +**Author:** @LantaoJin + +**Created:** 2025-08-05T10:35:37Z + +**State:** MERGED + +**Merged:** 2025-08-22T03:24:17Z + +**Changes:** +371 -81 (20 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `performance`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +In v2, `ResourceMonitor` gets the memory usage by using total memory - free memory of runtime. +But in v3, Calcite will produce lots of **recyclable** memory garbage during its planning and implementation process. For Clickbench [q30](https://github.com/opensearch-project/sql/blob/main/integ-test/src/test/resources/clickbench/queries/q30.ppl), it produces around 70 mb in optimizing + 30 mb in implementation on local test, which can all be GC, ref https://github.com/opensearch-project/sql/pull/3971#discussion_r2251784085 + +This PR changes the behaviour of `ResourceMonitor` for v3: +ResourceMonitor only checks the memory health by calculating the memory usage after GC of old gen. + +### Related Issues +Also can improve/resolve the issue of #3750 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +I just want to understand the idea more deeply: the assumption is that most objects in Young Gen—such as Calcite’s internal structures—are short-lived, meaning they die after GC and do not survive into the Old Gen, right? As a result, the memory usage calculated from this approach reflects a lower bound of the actual memory pressure? + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:119` + + +To align with behaviour of v2, OOM error will fail the whole JVM. We caught OOM error in previous implementation. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/system/OpenSearchSystemIndexEnumerator.java:47` + + +Healthy checking is missed in access OpenSearchSystemIndex. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +it should be debug log. + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/MemoryUsage.java:13` + + +Could you add missing javadoc? Meanwhile currently the API looks more like a data model class? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +What will happen if this is no full GC happened? We fall back to RuntimeMemoryUsage? +Also any significant delay on usage update if full GC is not frequent? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java:None` + + +Any concern of using GCedMemoryUsage as default, and deprecated RuntimeMemoryUsage? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/system/OpenSearchSystemIndexEnumerator.java:47` + + +I think we discussed this previously. Can we do this in non-intrusive way? Maybe AOP/wrap on operator level? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +Not only full GCs are listened, but also concurrent GCs are notified, the name could be `NonYoungGcListener` or `OldGenGCListener`? + +PS: CGC is quite frequent in G1 since it's not stop-the-world. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java:None` + + +No tech concern, just not want to introduce any breaking experiences for v2. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/MemoryUsage.java:13` + + +Sure. But `Record` class is implicitly final and cannot be extended. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:119` + + +It think it's OK to keep align with v2. But will it be better to trigger GC and then check memory usage again before making the service crash? Anyway, the primary purpose of this PR should not be blocked by this. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/system/OpenSearchSystemIndexEnumerator.java:47` + + +> I think we discussed this previously. Can we do this in non-intrusive way? Maybe AOP/wrap on operator level? + +Yes, technically we could wrap it on operator level. But it will introduce +1. A new logical node `LogicalMonitor` +2. A `ConverterRule` to convert a logical node to physical node +3. A new physical node such as `EnumerableMonitor` similar to `ResourceMonitorPlan` in v2. +4. Optimization #3853 has to find another way to workaround because the `root.rel` (physical plan) won't be a `Scannable` any more. + +Not quite sure it's still worth to do the AOP way now. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +deleted. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/MemoryUsage.java:13` + + +javadoc added. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +renamed to `OldGenGCListener` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:61` + + +Since we don't really handle notification for young GC, why do we add listener to all NotificationEmitter? + +I think the current implementation will go into `handleNotification` very frequency but then fast return, since young GC happens more frequent than others. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +[question]Comparing to GC action, will `getGcName` be better here? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +Shall we add the memory usage of other spaces as well instead of only old gen? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +I see. So this means we also make assumption on GC type? Or this is applied to other popular collectors too? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +no. `gcName` deeply depends on the GC type (G1、ZGC、Parallel GC) the name could be totally different for old gen garbage collection. `gcAction` is more stable. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +I will rename `oldGenUsage` to `mem` to avoid confusion. This size is not the memory size of old gen. Its the entire memory usage after old gen GC. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:61` + + +Ok, will do a refactor to remove the notification for young. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +done + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:61` + + +done + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/GCedMemoryUsage.java:None` + + +> Or this is applied to other popular collectors too? + +Yes. The current code considered popular collectors such as G1, CMS, ZGC. But not all collectors in industry + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:119` + + +> But will it be better to trigger GC and then check memory usage again before making the service crash? + +Maybe more discussion is required and track in a follow-up issue. + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/system/OpenSearchSystemIndexEnumerator.java:47` + + +Okay, probably in future we need this for other operators that possibly generate more records, such as cross/lateral join, right? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java:None` + + +fallback to RuntimeMemoryUsage will not trigger; A runtime exception thrown during static field initialization is wrapped in ExceptionInInitializerError. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java:None` + + +fixed + + +## General Comments + + +### @LantaoJin + + +> I just want to understand the idea more deeply: the assumption is that most objects in Young Gen—such as Calcite’s internal structures—are short-lived, meaning they die after GC and do not survive into the Old Gen, right? As a result, the memory usage calculated from this approach reflects a lower bound of the actual memory pressure? + +Not actually, the objects can survive into the Old Gen but can be easily GCed by concurrent GC (not YGC). + + +### @dai-chen + + +> > I just want to understand the idea more deeply: the assumption is that most objects in Young Gen—such as Calcite’s internal structures—are short-lived, meaning they die after GC and do not survive into the Old Gen, right? As a result, the memory usage calculated from this approach reflects a lower bound of the actual memory pressure? +> +> Not actually, the objects can survive into the Old Gen but can be easily GCed by concurrent GC (not YGC). + +Could you clarify the following points: + +1. When we say "can be easily GCed," are we assuming that all objects in the Young Generation are short-lived (recyclable)? That's why I was asking whether this new calculation represents a lower bound for total memory usage. +2. In the description, we mentioned 70MB + 30MB—is that memory usage per query? If it's total, is it a major concern? +3. Have we rerun the benchmark with these new changes? + + + +### @LantaoJin + + +> > > I just want to understand the idea more deeply: the assumption is that most objects in Young Gen—such as Calcite’s internal structures—are short-lived, meaning they die after GC and do not survive into the Old Gen, right? As a result, the memory usage calculated from this approach reflects a lower bound of the actual memory pressure? +> > +> > +> > Not actually, the objects can survive into the Old Gen but can be easily GCed by concurrent GC (not YGC). +> +> Could you clarify the following points: +> +> 1. When we say "can be easily GCed," are we assuming that all objects in the Young Generation are short-lived (recyclable)? That's why I was asking whether this new calculation represents a lower bound for total memory usage. +> 2. In the description, we mentioned 70MB + 30MB—is that memory usage per query? If it's total, is it a major concern? +> 3. Have we rerun the benchmark with these new changes? + +1. No, **"can be easily GCed"** means the object can be GCed by Young GC or Concurrent GC. The PR doesn't care about how long an object can stay in Young Gen, instead, as long as the objects can be GCed in Young GC or Concurrent GC(Old Gen), in another word, "easy" to trigger and "easy" to execute (no STW). Again, YGC and CGC are all work in CMS and G1, in ZGC, there is no Young/Old Gen, but all GC are concurrent. So the latest code should work as well. +2. "70MB + 30MB—is that memory usage per query " specified the Q30 of ClickBench query. It's just a user case. The major concern is after introducing Calcite, the memory usage could be much higher than v2 in optimizer stage. For a complex query (actually the Q30 is not complex enough), it may consume much higher memory than v2 since the Cascades style optimizer of Calcite really consumes memory to build equivalence set. But they should be easily GCed in YGC or CGC too. The old resource monitor may easy to trigger fast fail in v3. +3. Not yet. We can do it. But it may be no problem in benchmarking since the memory setting of EC2 is GB level. The failure is trigger easily in local test (laptop or Github CI). + + +### @LantaoJin + + +The IT could be still flaky in GitHub CI sometimes. But I cannot reproduce the resource limit issue in local with current implementation. But without this patch, resource limit issue fails easily in CalcitePPLClickBenchIT.test. + + +### @LantaoJin + + +> The IT could be still flaky in GitHub CI sometimes. But I cannot reproduce the resource limit issue in local with current implementation. But without this patch, resource limit issue fails easily in CalcitePPLClickBenchIT.test. + +Not see it again in latest two runs of CI. @dai-chen @penghuo can you check this PR again? + + +--- + +# PR #3982: Fix DOUBLE to STRING cast rendering zero values in scientific notation + +**URL:** https://github.com/opensearch-project/sql/pull/3982 + +**Author:** @yuancu + +**Created:** 2025-08-05T09:36:42Z + +**State:** MERGED + +**Merged:** 2025-08-11T02:54:55Z + +**Changes:** +98 -4 (4 files) + +**Labels:** `bug`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + +When calcite is enabled, casting a double (not decimal) of value `0.0` to string will result in `0E0`. The root cause is analyzed [here](https://github.com/opensearch-project/sql/issues/3947#issuecomment-3153922774). + +This PR fixed the issue by replacing the cast call to a `NUMBER_TO_STRING` call. + +The logical plan of query `source=opensearch-sql_test_index_account | head 1 | eval d= cast(0 as double) | eval s=cast(d as string) | fields s` is now: + +``` +LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(s=[NUMBER_TO_STRING(0.0E0:DOUBLE)]) + LogicalSort(fetch=[1]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) +``` + +Additionally, decimals are also cast with `NUMBER_TO_STRING`. Decimals like `0.xxx` were cast to string as `.xxx` before this PR. Now they are represented as `0.xxx` + +### Related Issues +Resolves #3947 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Does Calcite SQL have the same bug? + + +## Review Comments + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/NumberToStringFunction.java:48` + + +``` +Expressions.call(Primitive.ofBox(operand.getType()).getBoxClass(), "toString", operand) +``` +would be a little better since it doesn't need to create a boxed object for non-nullable fields. But the performance difference should be very trivial + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/NumberToStringFunction.java:48` + + +I agree. Updated the implementation. + + +## General Comments + + +### @yuancu + + +@dai-chen Yes + +``` +SELECT CAST(CAST(0.0 AS DOUBLE) AS VARCHAR(50)); ++--------+ +| EXPR$0 | ++--------+ +| 0E0 | ++--------+ +``` + +But I think it's a convention rather than bug. + + +### @qianheng-aws + + +> This PR fixed the issue by replacing the cast call to a FORMAT_NUMBER call with the decimal pattern: 0.0###############. + +Why not just using ` Expressions.call( + fromPrimitive.boxClass, + "toString", + operand)` directly? + + + I think the main conflict is Calcite has decimal format for 0 of double/float type, while we don't want it. + + + + + +### @LantaoJin + + +> LogicalProject(s=[FORMAT_NUMBER(0.0E0:DOUBLE, '0.0###############')]) + +-1 + +@yuancu you try the suggestion of https://github.com/opensearch-project/sql/pull/3982#issuecomment-3157521012? + + +### @yuancu + + +@qianheng-aws @LantaoJin Thanks for pointing out. That will be much more straightforward. + +Initially I wanted to avoid creating custom operators by reusing `SqlLibraryOperators.FormatNumber`. Yet now that I did create a new custom operator, leveraging `number.toString()` will be more straightforward. + + +### @LantaoJin + + +@yuancu could you fix the decimal-to-string issue too? https://github.com/opensearch-project/sql/issues/3947#issuecomment-3157751366 + + +### @yuancu + + +Fixed decimal casting. Reverted to `Expressions.call(Expressions.box(operand), "toString")` for a simpler and unified implementation. The performance degradation should be negligible. + + +### @dai-chen + + +So our current approach is to add UDF for any conversion not supported by Calcite built-in CAST function, right? I'm thinking shall we just have one enhanced CAST function impl instead of adding UDF per conversion. Any thoughts? + + + +### @yuancu + + +Yes, our current approach is to reuse Calcite's built-in cast as much as possible until its behavior does not match expectation. It's feasible to have an enhanced CAST UDF to handle all cast. + +Downside: +- throw away what's already implemented in `RexBuilder.makeCast` +- cannot directly reuse existing UDFs (E.g. DATE, TIME, TIMESTAMP) since you can not call another operator inside CAST UDF + +Upside: +- Cleaner and more unified cast + + + +### @LantaoJin + + +> So our current approach is to add UDF for any conversion not supported by Calcite built-in CAST function, right? I'm thinking shall we just have one enhanced CAST function impl instead of adding UDF per conversion. Any thoughts? + +we could have a rule to remove this UDF in plan for Spark (if the table is spark table, or a flag, enable the rule of `RemovingNumberToStringUDF`. + + +--- + +# PR #3979: [Backport 2.19-dev] Update the maven snapshot publish endpoint and credential + +**URL:** https://github.com/opensearch-project/sql/pull/3979 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-05T03:10:35Z + +**State:** MERGED + +**Merged:** 2025-08-07T02:36:35Z + +**Changes:** +5 -2 (3 files) + + +## Description + +Backport 7ccdcd1f18ba846410dacddf415d8f7fd8dccd95 from #3806. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3977: [Backport 2.19-dev] Disable a failed PPL query fallback to v2 by default (#3952) + +**URL:** https://github.com/opensearch-project/sql/pull/3977 + +**Author:** @LantaoJin + +**Created:** 2025-08-05T02:59:58Z + +**State:** MERGED + +**Merged:** 2025-08-05T16:05:41Z + +**Changes:** +14 -125 (103 files) + + +## Description + +(cherry picked from #3952 commit 7892b90bb5fe80bbeb60a3200d94177dfe20d839) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3976: [Backport 2.19-dev] Increase the precision of sum return type + +**URL:** https://github.com/opensearch-project/sql/pull/3976 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-08-04T23:58:59Z + +**State:** MERGED + +**Merged:** 2025-08-05T02:45:30Z + +**Changes:** +80 -3 (3 files) + + +## Description + +Backport db2a8bfaaabb0594d5d7e6565a2e95b4a0d29e26 from #3974. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3974: Increase the precision of sum return type + +**URL:** https://github.com/opensearch-project/sql/pull/3974 + +**Author:** @qianheng-aws + +**Created:** 2025-08-04T10:19:26Z + +**State:** MERGED + +**Merged:** 2025-08-04T23:58:45Z + +**Changes:** +80 -3 (3 files) + +**Labels:** `bug`, `PPL`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Increase the precision of sum return type + +TINYINT, SMALLINT, INTEGER, BIGINT -> BIGINT + +FLOAT, REAL, DOUBLE -> DOUBLE + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3958 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3972: Support pushdown dedup with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3972 + +**Author:** @LantaoJin + +**Created:** 2025-08-04T09:47:27Z + +**State:** MERGED + +**Merged:** 2025-08-20T03:57:57Z + +**Changes:** +453 -54 (23 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Support pushdown dedup with Calcite, if all conditions matched: +- number of retaining events no larger than 1 +- dedup field list only contain one field +- the dedup field is one of field of index +- the dedup field type is keyword (or contain keyword subfield) or numeric +- the dedup option `keepempty=false` or missing + +PPL query `source=account | fields account_number, gender, age | dedup gender` +Before: +``` +LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, account]]) +``` +After: +``` +CalciteEnumerableIndexScan(table=[[OpenSearch, account]], PushDownContext=[ +[PROJECT->[account_number, gender, age], FILTER->IS NOT NULL($1), COLLAPSE->gender, LIMIT->10000] + +{ + "from": 0, + "size": 10000, + "timeout": "1m", + "query": { + "exists": { + "field": "gender", + "boost": 1.0 + } + }, + "_source": { + "includes": ["account_number", "gender", "age"], + "excludes": [] + }, + "collapse": { + "field": "gender.keyword" + } +} +``` + +Besides, I open https://github.com/opensearch-project/sql/issues/3973 as followup to support complex `dedup` cases. + +### Related Issues +Resolves #3913 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:188` + + +what is default sort order of opensearch collapse if no sort-key? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:169` + + +why number type is not supported? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:188` + + +tie-break logic in OpenSearch. shard_id, and _doc. +documenting this behavior in the deduplication logic to clarify how OpenSearch handles tie-breaking by default. + + /** + * if we need to tie-break since score / sort value are the same we first compare shard index (lower shard wins) + * and then iff shard index is the same we use the hit index. + */ + static boolean tieBreakLessThan(ShardRef first, ScoreDoc firstDoc, ShardRef second, ScoreDoc secondDoc) { + final int firstShardIndex = first.getShardIndex(firstDoc); + final int secondShardIndex = second.getShardIndex(secondDoc); + // Tie break: earlier shard wins + if (firstShardIndex < secondShardIndex) { + return true; + } else if (firstShardIndex > secondShardIndex) { + return false; + } else { + // Tie break in same shard: resolve however the + // shard had resolved it: + assert first.hitIndex != second.hitIndex; + return first.hitIndex < second.hitIndex; + } + } + + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:169` + + +only supports keywords and numeric types. There is a `!` before the condition. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:188` + + +> what is default sort order of opensearch collapse if no sort-key? + +The default sort order is added by other logic, for example `_doc`. I will check the sort behavior of dedup in v2 and spl. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +Seems we don't need to multiply rowCount by estimateRowCountFactor 2 times. There is such operation below already. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +Why not adding a more precise predicator here? It only checks the operation is `<=` but not checking literal is 1 until `onMatch`. However, we have chance to skip the `onMatch` earlier here, then it has no needs to do predication for its children. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +Seems not making sense to find the literal deeply. e.g. We don't want to do optimization for the filter `$? + 1 <= 1`, the only case we want to match is `$? <= 1`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:51` + + +Shall we check the size of `windows` is exactly 1? Or maybe do such check earlier in the predicator. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +We also need to check the field in the condition is the `_row_number_dedup` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +It's on purpose, I cannot figure out the selectivity such as FILTER does. So multiply the estimateRowCountFactor twice. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +> We also need to check the field in the condition is the _row_number_dedup + +Checked in rule config. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:51` + + +size of windows is exactly 1 for _row_number_dedup_. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +For further optimization, the number could be `> 1` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:300` + + +use Programs.standard() in building context because the actual execution uses it. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java:57` + + +fix flaky + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +As discussed offline, we'd better prevent going into `match` as we could as possible. So we need to put `validFilter ` here and it should only be changed like current code or update `validFilter` until in that PR for supporting `>1`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexScanRule.java:None` + + +Are these 2 functions used anywhere? + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:None` + + +Seems never used + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchDedupPushdownRule.java:None` + + +> As discussed offline, we'd better prevent going into `match` as we could as possible. So we need to put `validFilter ` here and it should only be changed like current code or update `validFilter` until in that PR for supporting `>1`. + +done + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3971: SUM aggregation enhancement on operations with literal + +**URL:** https://github.com/opensearch-project/sql/pull/3971 + +**Author:** @qianheng-aws + +**Created:** 2025-08-04T09:23:56Z + +**State:** MERGED + +**Merged:** 2025-08-21T20:23:36Z + +**Changes:** +561 -5 (11 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +SUM aggregation enhancement on operations with literal + + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3967 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java:None` + + +Calcite will produce lots of recyclable memory garbage during its planning and implementation process. For q30, it produces around 70 mb in optimizing + 30 mb in implementation on my local test, which can all be GC. + +So I think, for Calcite, the ResourceMonitor's memory health check is unpredictable and less reasonable. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:145` + + +Can we add a comments before and after code from calcite + +// START CALCITE +... +your code +// END CALCITE + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:159` + + +2.2f magic number, Can i be changed in future? + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java:None` + + +ResourceMonitor protect physical operator, how it related to Calcite optimizing/implementation logica? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java:None` + + +Add UT + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java:None` + + +format java doc. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java:None` + + +SUM(FILED / NUMBER) -> SUM(FIELD) / NUMBER, does it work for float/double? + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java:None` + + +https://github.com/opensearch-project/sql/blob/7892b90bb5fe80bbeb60a3200d94177dfe20d839/opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java#L53-L58 + +ResourceMonitor get the memory usage by using total memory - free memory. While the whole process of a query(even other queries previous to the current one) will accumulate the memory usage until GC. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:159` + + +Usually it doesn't need any change unless we met some edge case(like the final plan doesn't satisfy our expectation) where I need refactor it. + +Similar to the factor we add in filter script push down: https://github.com/opensearch-project/sql/blob/7892b90bb5fe80bbeb60a3200d94177dfe20d839/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java#L128 + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java:None` + + +No, they also have precision issue, e.g: + +assuming `SUM(a / 0.2)` and the table has a = 0.1 for its all 3 rows. The execution could be present: + +0.1 / 0.2 * 3 = 1.5 + +0.1 * 3 / 0.2 = 1.5000000000000002 + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java:None` + + +I've submitted a PR #3983 to fix the issue + + +## General Comments + + +### @qianheng-aws + + +@penghuo Please take another look. + + +--- + +# PR #3970: `fields` Command Enhancement - Advanced Field Selection Features (Calcite) + +**URL:** https://github.com/opensearch-project/sql/pull/3970 + +**Author:** @aalva500-prog + +**Created:** 2025-08-02T09:21:18Z + +**State:** MERGED + +**Merged:** 2025-08-20T19:16:58Z + +**Changes:** +2304 -115 (18 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +# PPL `fields` Command Enhancement (Calcite) + +## Description + +This PR implements enhancement to the already existing PPL `fields` command with advanced field selection capabilities when Calcite is enabled. The enhancement adds five key features to improve usability and provide flexible syntax options for field selection in PPL queries. + +### Features Implemented + +**✅ Feature 1: Space-delimited field syntax** +- Added support for space-separated fields in addition to comma-separated syntax +- Example: `source=accounts | fields firstname lastname age` + +**✅ Feature 2: Wildcard pattern matching** +- Implemented prefix, suffix, and contains wildcards for dynamic field selection +- Prefix: `source=accounts | fields account*` (matches account_number) +- Suffix: `source=accounts | fields *name` (matches firstname, lastname) +- Contains: `source=accounts | fields *a*` (matches all fields containing 'a') + +**✅ Feature 3: Field deduplication** +- Automatic removal of duplicate fields when wildcards expand to already specified fields +- Maintains clean result sets and optimal performance + +**✅ Feature 4: Mixed delimiter support** +- Support for both space and comma delimiters in the same command +- Example: `source=accounts | fields firstname lastname, balance` + +**✅ Feature 5: Table command alias** +- Created `table` command that functions identically to `fields` command +- Alternative syntax: `source=accounts | table firstname, lastname` +- All features (1-4) work identically with both `fields` and `table` syntax + +## Technical Implementation + +### Parser Changes +- Updated PPL grammar to support new field selection syntax patterns +- Added support for space-delimited and mixed delimiter field lists +- Implemented `table` command as parser-level alias generating identical `Project` AST structures + +### Core Enhancements +- Enhanced wildcard pattern matching with regex-based field resolution +- Implemented automatic field deduplication to prevent duplicate selections +- Added support for mixed delimiter parsing (space + comma combinations) + +### Cross-Engine Compatibility +- Full compatibility with both Calcite and non-Calcite execution engines + +## Usage Examples + +```ppl +# Feature 1: Space-delimited syntax +source=accounts | fields firstname lastname age + +# Feature 2: Wildcard pattern matching +source=accounts | fields account* # Prefix matching +source=accounts | fields *name # Suffix matching +source=accounts | fields *a* # Contains matching + +# Feature 3: Field deduplication (automatic) +source=accounts | fields firstname, *name # firstname not duplicated + +# Feature 4: Mixed delimiter support +source=accounts | fields firstname lastname, balance + +# Feature 5: Table command alias - identical functionality +source=accounts | fields account_number, firstname, lastname +source=accounts | table account_number, firstname, lastname + +# All features work with both commands +source=accounts | fields firstname lastname, account* +source=accounts | table firstname lastname, account* +``` +## Testing + +### Unit Tests Added +- ✅ Wildcard pattern matching logic (prefix, suffix, contains, complex patterns) +- ✅ Field deduplication algorithms +- ✅ Space-delimited and mixed delimiter syntax parsing +- ✅ Cross-engine compatibility validation + +### Integration Tests Added +- ✅ End-to-end functionality for both `fields` and `table` commands +- ✅ Pipeline integration with other PPL commands +- ✅ Performance benchmarks comparing wildcard vs. explicit field selection +- ✅ Edge case testing with special characters and unusual field names + +### Test Coverage Requirements +- ✅ **All Five Features**: Tested for both `fields` and `table` commands +- ✅ **Cross-Engine Compatibility**: Both Calcite and non-Calcite engines validated +- ✅ **Regression Testing**: Ensures existing functionality remains intact +- ✅ **Performance Validation**: No significant degradation in query execution + +## Performance Impact + +- ✅ **No Performance Degradation**: Benchmarks show no significant impact on query execution +- ✅ **Optimized Pattern Matching**: Efficient regex-based wildcard resolution +- ✅ **Memory Efficient**: Automatic deduplication prevents unnecessary field processing + +## Backward Compatibility + +- ✅ **100% Backward Compatible**: All existing queries continue to work without modification +- ✅ **Additive Features**: New features are purely additive and don't break existing functionality +- ✅ **Graceful Error Handling**: Invalid patterns handled with clear error messages + +## Documentation + +- ✅ Updated PPL command documentation with new syntax examples +- ✅ Added comprehensive usage examples and best practices +- ✅ Updated grammar documentation for parser changes + +## Best Practices + +- Use wildcards to simplify queries but be mindful of pattern specificity to avoid selecting unintended fields +- Prefer more specific patterns (e.g., `user.*` instead of `u*`) to improve performance +- Consider using explicit field names for critical fields and wildcards for supplementary fields +- When using multiple wildcards, order them from most to least specific for better readability + +## Related Issues +Adresses [#3888](https://github.com/opensearch-project/sql/issues/3888) and [#3877](https://github.com/opensearch-project/sql/issues/3877) + +## 📊 BIG5 FIELDS PERFORMANCE TEST +#### Dataset: big5 (1.16 billion documents, 257GB) +#### Test Date: Thu Aug 7 2025 + +### Simple Baseline Performance summary +- head 10: Avg = 151ms, P90 = 46ms (6 QPS) +- head 50: Avg = 42ms, P90 = 50ms (23 QPS) +- head 100: Avg = 39ms, P90 = 42ms (25 QPS) + +### Simple Table/Fields Command Performance Summary +- table 10 (2 fields): Avg = 25ms, P90 = 27ms (39 QPS) +- table 10 (wildcard *): Avg = 20ms, P90 = 23ms (48 QPS) +- table 10 (with where): Avg = 66ms, P90 = 155ms (15 QPS) +- fields 10 (2 fields): Avg = 25ms, P90 = 26ms (40 QPS) +- fields 10 (wildcard *): Avg = 20ms, P90 = 21ms (48 QPS) +- fields 10 (with where): Avg = 39ms, P90 = 52ms (25 QPS) + +### Large Dataset Baseline Performance Summary +- head 10K: Avg = 792ms, P90 = 885ms (1 QPS) +- head 50K: Avg = 719ms, P90 = 722ms (1 QPS) +- head 100K: Avg = 720ms, P90 = 722ms (1 QPS) + +### Table/Fields Large Dataset Performance Summary (>10K rows) +- table 10K rows: Avg = 195ms, P90 = 197ms +- table 50K rows: Avg = 196ms, P90 = 196ms +- table 10K rows: Avg = 196ms, P90 = 197ms +- fields 10K rows: Avg = 196ms, P90 = 197ms +- fields 50K rows: Avg = 196ms, P90 = 197ms +- fields 10K rows: Avg = 195ms, P90 = 196ms + +### BASELINE QUERIES +#### Basic head operations for baseline performance +- source=big5 | head 10; +- source=big5 | head 50; +- source=big5 | head 100; + +#### Large dataset baselines +- source=big5 | head 10000; +- source=big5 | head 50000; +- source=big5 | head 100000; + +### BASIC FIELD SELECTION +#### Single field selection +- source=big5 | table `@timestamp` | head 10; +- source=big5 | fields `@timestamp` | head 10; + +#### Two field selection +- source=big5 | table `@timestamp`, `metrics.size` | head 10; +- source=big5 | fields `@timestamp`, `metrics.size` | head 10; + +#### Three field selection +- source=big5 | table `@timestamp`, `metrics.size`, `metrics.tmin` | head 10; +- source=big5 | fields `@timestamp`, `metrics.size`, `metrics.tmin` | head 10; + +### WILDCARD FIELD SELECTION +#### Full wildcard selection +- source=big5 | table * | head 10; +- source=big5 | fields * | head 10; + +#### Prefix wildcard +- source=big5 | table metrics* | head 10; +- source=big5 | fields metrics* | head 10; + +#### Suffix wildcard +- source=big5 | table *size | head 10; +- source=big5 | fields *size | head 10; + +#### Mixed explicit and wildcard +- source=big5 | table `@timestamp`, metrics*, `host.name` | head 10; +- source=big5 | fields `@timestamp`, metrics*, `host.name` | head 10; + +### FIELD SELECTION WITH OPERATIONS +#### With WHERE clause +- source=big5 | where `metrics.size` > 1000 | table `@timestamp`, `metrics.size`, `host.name` | head 10; +- source=big5 | where `metrics.size` > 1000 | fields `@timestamp`, `metrics.size`, `host.name` | head 10; + +#### With complex pipeline +- source=big5 | where `metrics.size` > 1000 | sort `metrics.size` | table `@timestamp`, `metrics.size`, `host.name` | head 2; +- source=big5 | where `metrics.size` > 1000 | sort `metrics.size` | fields `@timestamp`, `metrics.size`, `host.name` | head 2; + +#### With EVAL operation +- source=big5 | eval size_kb = `metrics.size`/1024 | table `@timestamp`, size_kb | head 10; +- source=big5 | eval size_kb = `metrics.size`/1024 | fields `@timestamp`, size_kb | head 10; + +#### With STATS operation +- source=big5 | stats count() as cnt, avg(`metrics.size`) as avgSize by `host.name` | sort avgSize | table `host.name`, avgSize, cnt | head 10; +- source=big5 | stats count() as cnt, avg(`metrics.size`) as avgSize by `host.name` | sort avgSize | fields `host.name`, avgSize, cnt | head 10; + +### LARGE DATASET PERFORMANCE QUERIES +#### Large dataset field selection +- source=big5 | table `@timestamp`, `metrics.size` | head 10000; +- source=big5 | fields `@timestamp`, `metrics.size` | head 10000; +- source=big5 | table `@timestamp`, `metrics.size` | head 50000; +- source=big5 | fields `@timestamp`, `metrics.size` | head 50000; +- source=big5 | table `@timestamp`, `metrics.size` | head 100000; +- source=big5 | fields `@timestamp`, `metrics.size` | head 100000; + +#### Large dataset with wildcards +- source=big5 | table metrics* | head 25000; +- source=big5 | fields metrics* | head 25000; + +### PIPELINE OPERATIONS +#### Mixed commands in pipeline +- source=big5 | table `@timestamp`, `metrics.size`, `host.name`, `message` | fields `@timestamp`, `metrics.size` | head 10; +- source=big5 | fields `@timestamp`, `metrics.size`, `host.name`, `message` | table `@timestamp`, `metrics.size` | head 10; + +#### Complex pipeline +- source=big5 | table `@timestamp`, `metrics.size`, `host.name`, `message`, `agent.name` | where `metrics.size` > 500 | fields `@timestamp`, `metrics.size`, `host.name` | sort `metrics.size` | table `@timestamp`, `metrics.size` | head 10; + +## Check List +- [✅] New functionality includes testing. +- [✅] New functionality has been documented. + - [✅] New functionality has javadoc added. + - [✅] New functionality has a user manual doc added. +- [] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [✅] Commits are signed per the DCO using `--signoff`. +- [✅] Public documentation issue/PR [created](https://github.com/opensearch-project/sql/issues/3888). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @noCharger - COMMENTED + + +``` +Large Dataset Baseline Performance +head 10K: Avg = 792ms, P90 = 885ms (1 QPS) +head 50K: Avg = 719ms, P90 = 722ms (1 QPS) +head 100K: Avg = 720ms, P90 = 722ms (1 QPS) +Table/Fields Large Dataset Performance (>10K rows) +table 10K rows: Avg = 195ms, P90 = 197ms +table 50K rows: Avg = 196ms, P90 = 196ms +table 10K rows: Avg = 196ms, P90 = 197ms +fields 10K rows: Avg = 196ms, P90 = 197ms +fields 50K rows: Avg = 196ms, P90 = 197ms +fields 10K rows: Avg = 195ms, P90 = 196ms +``` + +Could you share the query used for benchmarking? Also let's add them as a seperate PR: +* https://github.com/opensearch-project/sql/tree/main/integ-test/src/test/resources/clickbench/queries +* https://github.com/opensearch-project/sql/tree/main/integ-test/src/test/resources/big5/queries + + + + + + +### @LantaoJin - COMMENTED + + +Can you add some anonymizer tests in `PPLQueryDataAnonymizerTest` + + +## Review Comments + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Thanks for the refactoring! + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +Let's try not to put too much comment especially for the private methods. We should try to make the code self explanatory without comment. Same for the following methods. +Good reference => https://bpoplauschi.github.io/2021/01/20/Clean-Code-Comments-by-Uncle-Bob-part-2.html + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +This would change the original processing order. Is it fine? + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/analysis/WildcardFieldResolver.java:None` + + +I was not sure why we need LinkedHashSet. Should we simply use HashSet? + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/analysis/WildcardFieldResolverTest.java:30` + + +I think we want to have test cases for over wrapping case like `*a*, *name`. + + +### @ykmr1224 on `core/src/test/java/org/opensearch/sql/analysis/WildcardFieldResolverTest.java:None` + + +Optional: Maybe we can improve readability by extraction like: `testWildcard(ImmutableList.of("account*"), ImmutableList.of("account_number"))` + + +### @ykmr1224 on `docs/user/ppl/cmd/fields.rst:None` + + +QQ: Does it accept full wild card? `*` + + +### @ykmr1224 on `docs/user/ppl/cmd/fields.rst:None` + + +When explicitly specified as separate field, should we output duplicate fields? (Sometimes user wants duplicate fields for some reason) + + +### @aalva500-prog on `docs/user/ppl/cmd/fields.rst:None` + + +The current implementation doesn't support full wildcard selection in the `fields` and `table` commands directly, as in PPL you can select all fields like this: `source=accounts`. I can try to add this feature, though. Thanks for raising this point! + + +### @aalva500-prog on `docs/user/ppl/cmd/fields.rst:None` + + +We may need to confirm with the team. As we are trying to replicate SPL behavior. The thing is that in SPL, the `fields` and `table` commands deduplicate fields automatically, even when explicitly specified. On the other hand, in PPL I received an exception when specifying the same field names twice or more. + +Update: we should align with SPL behavior, so field deduplication in output should be the way to go. + + +### @aalva500-prog on `core/src/test/java/org/opensearch/sql/analysis/WildcardFieldResolverTest.java:30` + + +Sure, I can add more test cases here. + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/analysis/WildcardFieldResolver.java:None` + + +In this case, I'm using LinkedHashSet to preserve the order of fields already visited/seen. The `matchWildcardPattern` method already preserves "natural order" from available fields, imo, maintaining that order through the deduplication process makes sense. + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/analysis/Analyzer.java:None` + + +You are right, I will handle both cases as separate methods, and keep the original order. Thanks! + + +### @aalva500-prog on `core/src/test/java/org/opensearch/sql/analysis/WildcardFieldResolverTest.java:None` + + +Sure, I can implement the tests in this way, thanks! + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/analysis/WildcardFieldResolver.java:None` + + +It looks like the order stored in `seenFields` is not used. Does it worth maintaining it? + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/analysis/WildcardFieldResolver.java:None` + + +@ykmr1224 Took care of it, please review, thanks! + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Seems `wcFieldExpression` is used for other commands. Is it safe to add STAR here? Any concern for other commands using it? + + +### @ykmr1224 on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Is this needed? I think `wcMixedFieldList` would match purely space-separated field list as well. + + +### @dai-chen on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldsTest.java:None` + + +Can we deduplicate the code for each tests? The only difference for each is the test query? + + +### @dai-chen on `core/src/test/java/org/opensearch/sql/analysis/SelectAnalyzeTest.java:None` + + +Does this change for Feature #3 impact SQL behavior? I recall SQL standard allows duplicate fields? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Just to confirm is this expected behavior and documented in user manual? + + +### @aalva500-prog on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +You're absolutely right! The wcSpaceSeparatedFieldList rule is redundant because wcMixedFieldList already handles space-separated field lists. I'll take care of it, thanks! + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Can all these new expanding logic happen before existing logic? +e.g., `Project expanded = node.expand(currentFields)` or `expand(node, currentFields)`? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/analysis/WildcardFieldResolver.java:None` + + +Just want to confirm: is all new command or enhancement expected to work with/without Calcite? + + +### @aalva500-prog on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Yes, it is used for other commands. I can create a new rule only for the fields/table commands to avoid any issues. + + +### @aalva500-prog on `core/src/test/java/org/opensearch/sql/analysis/SelectAnalyzeTest.java:None` + + +It shouldn't impact SQL behavior. I have modified the logic in my implementation to verify/confirm that my changes only work with PPL commands and not with SQL queries. In this way, I don't have to modify any tests on this file. In this case, I have placed the file back to its original state. + + +### @aalva500-prog on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldsTest.java:None` + + +Sure, I can do that, np. I was just trying to confirm that the queries work the same way with `fields` and `table` command accordingly. Thanks for raising this point! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/analysis/WildcardFieldResolver.java:None` + + +Confirming with our team wether we should only expect the enhancements in the `fields` command to work both when Calcite is enabled and disabled. I'll update here once I get an answer, thanks! + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +@dai-chen, thanks for raising this point. If I understand correctly, I think you're asking how this is handled in SPL and if there's any documentation for it? + +If so, when there are no matches for the input field, SPL doesn't throw an exception; it just provides information from the internal fields. In SPL when using the `fields` command, by default, the internal fields `_raw` and `_time` are included in search results in Splunk Web. The fields command doesn't strip these internal fields unless you explicitly specify that they shouldn't appear in Splunk Web output [1]. + +On the other hand, in the current implementation of PPL in OpenSearch, if you try to select a field that doesn't exist you would get the following error: + +``` +"Fail to analyze query. Cause: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `XYZ` cannot be resolved. Did you mean one of the following +``` + +That said, since there's no SPL-like behavior in the current version of OpenSearch PPL, I'm throwing an exception whenever the user provides a query that doesn't match any available field. Please let me know if this makes sense or if you think the implementation should be handled differently. Perhaps we can consult with the team about displaying internal fields as in SPL. + +------------------ + +- Update 08/13/25: Our team has agreed to keep the existing behavior and provide the exception when there is no field match in the query, as currently implemented. + +#### References: +[1] https://help.splunk.com/en/splunk-enterprise/search/spl-search-reference/9.3/search-commands/fields + + +### @aalva500-prog on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +I have refactored the visitProject method, please review, thanks! + + +## General Comments + + +### @aalva500-prog + + +@noCharger I have added the queries I used for the Big5 performance testing in the PR description. I will create a new PR to add the query files accordingly. Thanks! + + +### @LantaoJin + + +Hi @aalva500-prog , can you backport this to 2.19-dev branch manually? + + +--- + +# PR #3968: [Backport 2.19-dev] Support function argument coercion with Calcite (#3914) + +**URL:** https://github.com/opensearch-project/sql/pull/3968 + +**Author:** @yuancu + +**Created:** 2025-08-01T03:57:22Z + +**State:** MERGED + +**Merged:** 2025-08-06T03:13:12Z + +**Changes:** +713 -383 (39 files) + + +## Description + +## Description + +Backport #3914 to 2.19-dev + +## Commit Message + +* Change the use of SqlTypeFamily.STRING to SqlTypeFamily.CHARACTER as the string family contains binary, which is not expected for most functions + + + +* Implement basic argument type coercion at RelNode level + + + +* Conform type checkers with their definition in documentation +- string as an input is removed if it is not in the document +- string as an input is kept if it is in the document, even if it can be implicitly cast +- use PPLOperandTypes as much as possible + + + +* Implement type widening for comparator functions + +- Add COMPARATORS set to BuiltinFunctionName for identifying comparison operators +- Implement widenArguments method in CoercionUtils to find widest compatible type +- Apply type widening to comparator functions before applying type casting +- Add detailed JavaDoc to explain coercion methods + + + +* Update error messages of datetime functions with invalid args + + + +* Simplify datetime-string compare logic with implict coercion + + + +* Refactor resolve with coercion + + + +* Move down argument cast for reduce function + + + +* Merge comparators and their IP variants so that coercion works for IP comparison + +- when not merging, ip comparing will also pass the type checker of Calcite's comparators + + + +* Refactor ip comparator to comparator + + + +* Revert "Refactor ip comparator to comparator" + +This reverts commit c539056618cf9fdd6b5b75ee2d04a596fcdcfe3d. + + + +* Revert "Merge comparators and their IP variants so that coercion works for IP comparison" + +This reverts commit bd9f3bb8b663309d7f129b6441dda1f045187237. + + + +* Rule out ip from built-in comparator via its type checker + + + +* Restrict CompareIP's parameter type + + + +* Revert to previous implementation of CompareIpFunction to temporarily fix ip comparison pushdown problems (udt not correctly serialized; ip comparison is not converted to range query) + + + +* Test argument coercion explain + + + +* Fix error msg in CalcitePPLFunctionTypeTest + + + +--------- + + +(cherry picked from commit 484f49ebaa44264b421f54225dc19e2623ae9f85) + +### Related Issues +#3761 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3966: [Backport 2.19-dev]Append limit operator for QUEERY_SIZE_LIMIT (#3940) + +**URL:** https://github.com/opensearch-project/sql/pull/3966 + +**Author:** @qianheng-aws + +**Created:** 2025-08-01T02:58:31Z + +**State:** MERGED + +**Merged:** 2025-08-04T01:51:35Z + +**Changes:** +348 -230 (105 files) + + +## Description + +* Append limit operator for QUEERY_SIZE_LIMIT + + + +* Add LogicalSystemLimit + + + +* Revert part of #3880 + + + +* Fix IT after merging main + + + +--------- + + +(cherry picked from commit 5316c0a8e985a6fc895cf0714e6d813e5b72c653) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3962: Add issue template specific for PPL commands and queries + +**URL:** https://github.com/opensearch-project/sql/pull/3962 + +**Author:** @anasalkouz + +**Created:** 2025-07-31T19:44:54Z + +**State:** MERGED + +**Merged:** 2025-08-03T18:51:20Z + +**Changes:** +71 -0 (1 files) + +**Labels:** `documentation`, `enhancement` + + +## Description + +### Description +In order to make it easier for users to report any specific PPL issue, this template will ask for more details like PPL query used and it will assign PPL label automatically + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +Thanks! + + +### @RyanL1997 - APPROVED + + +LGTM! Thanks. + + +## Review Comments + + +### @penghuo on `.github/ISSUE_TEMPLATE/ppl_bug_report.md:None` + + +Ask for index mapping and sample data explicitly. + + +### @RyanL1997 on `.github/ISSUE_TEMPLATE/ppl_bug_report.md:None` + + +agree, and maybe add another reminder for sanitizing the sensitive info/fields also. + + +### @anasalkouz on `.github/ISSUE_TEMPLATE/ppl_bug_report.md:None` + + +Both comments have been addressed + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3960: [Backport 2.19-dev] Add missing command in index.rst (#3943) + +**URL:** https://github.com/opensearch-project/sql/pull/3960 + +**Author:** @penghuo + +**Created:** 2025-07-31T18:21:06Z + +**State:** MERGED + +**Merged:** 2025-08-04T16:39:04Z + +**Changes:** +52 -24 (3 files) + +**Assignees:** @penghuo + + +## Description + +(cherry picked from commit 0da93a38ab2cd51a825eb27b2c4dbf5ed49ae2a5) + +### Description +Backport https://github.com/opensearch-project/sql/pull/3943 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3959: Push down IP comparison as range query with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3959 + +**Author:** @yuancu + +**Created:** 2025-07-31T10:24:06Z + +**State:** MERGED + +**Merged:** 2025-08-18T08:34:31Z + +**Changes:** +153 -38 (11 files) + +**Labels:** `bug`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + +IP comparison was pushed down as script. However, DSL supports range query with IP like the following: +```json +{"range":{"host":{"from":"1.1.1.1","to":null}}} +``` + +This PR push down IP comparison to range queries like above. + +For query `source=weblogs | where '1.1.1.1' < host | fields host`: + +Before: + +```json +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(host=[$0])\n LogicalFilter(condition=[LESS_IP(IP('1.1.1.1':VARCHAR), $0)])\n CalciteLogicalIndexScan(table=[[OpenSearch, weblogs]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, weblogs]], PushDownContext=[[PROJECT->[host], SCRIPT->LESS_IP(IP('1.1.1.1':VARCHAR), $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAensKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJPVEhFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImhvc3QiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQDensKICAib3AiOiB7CiAgICAibmFtZSI6ICJMRVNTX0lQIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6ICIxLjEuMS4xIiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJPVEhFUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9LAogICAgICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgICAgICJkeW5hbWljIjogZmFsc2UKICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQk9PTEVBTiIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAEaG9zdH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAAklQeHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":1754372230769968000}},\"boost\":1.0}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} +``` + +After: + +```json +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(host=[$0])\n LogicalFilter(condition=[LESS_IP(IP('1.1.1.1':VARCHAR), $0)])\n CalciteLogicalIndexScan(table=[[OpenSearch, weblogs]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, weblogs]], PushDownContext=[[PROJECT->[host], FILTER->LESS_IP(IP('1.1.1.1':VARCHAR), $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"1.1.1.1\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} +``` + +### Related Issues +Resolves #3937 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +can you add some doc in `withReverse()` to explain when should we use this API? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +And these changes are making the code harder to read and maintain. Can this `withReverse(lookupOperator("EQUALS_IP"))` be hidden? cc @ishaoxy as the `CompareIpFunction` author, do you understand what the new pattern code do? + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +Truly a little hard for me. orz + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +I created javadoc for this method. + +Why I added this method: + +1. `PredicateAnalyzer` relies on the `SqlKind` attribute of a comparator to convert comparison to DSL. E.g. if an operator has sql kind `SqlKind.GREATER_THAN_OR_EQUAL`, a DSL is created with `QueryExpression.create(pair.getKey()).gte(pair.getValue())` +2. To reuse this mechanism and process IP comparison in the same way, I override the `getKind` method of IP comparators with corresponding SQL kind. +3. When a `RelBuilder` normalizes the condition of a filter, it creates a hash code for the comparison operator. During this process, it will call `reverse()` of a `SqlOperator` to retrieve its reverse if it finds the comparison has to be normalized to a certain order. +4. This requires the `LESS_IP` UDF to have a reverse `GREATER_IP`. But they are defined sequentially, I have to find a way to assign their reverse later after all operators in `PPLBuiltinOperators` are instantiated. +5. That's where `withReverse` and `lookupOperator` come in. This delays the assignment of the reverse operator to the first time the `reverse()` of an IP comparison UDF is called -- by that time, all operators in `PPLBuiltinOperators` are already instantiated. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +Hiding it is a bit tricky for me. The reverse operator of a operator is defined at the same time with the operator itself in `PPLBuiltinOperators`. Therefore, looking it up will refer to `PPLBuiltinOperators`. If I hide `withReverse` by moving the lookup into `CompareIpFunction`, this will introduce circular import. + +However, I can make the reverse operator an argument to the constructor of an IP comparator: + +```java +public static final SqlOperator GREATER_IP = + CompareIpFunction.greater(lookupOperator("LESS_IP")).toUDF("GREATER_IP"); +``` + +And the `CompareIPFunction` class will be like: + +```java +public class CompareIpFunction extends ImplementorUDF { + private final SqlKind kind; + /** + * The reverse operator supplier that provides the reverse SQL operator + * + *

    This method is used to establish the reversed relationship between comparison operators + * (e.g., "less than" and "greater than"). When the query optimizer normalizes expressions, it may + * need to transform "b > a" to "a < b". + * + *

    E.g. in the {@code hashCode} method of {@link org.apache.calcite.rex.RexNormalize}#L115, it + * always converts B [comparator] A to A [reverse_comparator] B if the ordinal of + * the reverse of the comparator is smaller. + * + *

    IP comparison functions use this to inform the optimizer that ip_less_than is the reverse of + * ip_greater_than, allowing for proper query normalization. + */ + private Supplier reverse; + + private CompareIpFunction(SqlKind kind, Supplier reverse) { + super(new CompareImplementor(kind), NullPolicy.ANY); + this.kind = kind; + this.reverse = reverse; + } + + public static CompareIpFunction greater(Supplier reverse) { + return new CompareIpFunction(SqlKind.GREATER_THAN, reverse); + } +... +``` + +Will this be more clear? If so, I'll update the implementation. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +No. maybe just hide the lookupOperator(), change the interface of `withReverse` to +``` +public static final SqlOperator EQUALS_IP = + CompareIpFunction.equals().withReverse("EQUALS_IP").toUDF("EQUALS_IP"); +``` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +This will be a little tricky. `lookupOperator` relies on `PPLBuiltinOperators`. Hiding `lookupOperator` means moving operator lookup into `CompareIpFunction`. This will introduce cross dependency (`PPLBuiltinOperator -> CompareIpFunction` and `CompareIpFunction -> PPLBuiltinOperator`). + +Besides, it will reduce coupling to have the `lookupOperator` staying within where the operators are defined. `CompareIPFunction` does not need to know anything about other operators or how will it be used. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +> When a RelBuilder normalizes the condition of a filter, it creates a hash code for the comparison operator. During this process, it will call reverse() of a SqlOperator to retrieve its reverse if it finds the comparison has to be normalized to a certain order. + +1. What's benefit of overriding this API? Or what's the main disadvantage of viewing `a < b` and `b > a` as 2 different expressions? + +2. As for implementation, will it be better to add the reverse relationship unified in UserDefinedFunctionBuilder? Furthermore, if one operator has its reverse operator, we could invoke `toBinaryUDF` which has implementation of `reverse` based on functions' name. I think it's also fine not to add such API. Please refer to `SqlBinaryOperator` in Calcite, which implements their reverse based on SqlKind. By that way, we don't have to specify each functions' reverse here. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +1. It is a required override since Calcite will invoke `reverse()` and perform subsequent operations since it assumes any `SqlOperator` with `SqlKind` like `LESS_THAN` must have a reverse operator. + +2. I get it now. Yes, it'd doable. That will be neater and hide confusing details. I'll update the implementation. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +@qianheng-aws Refactored as suggested. Please review again. Thanks! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3957: Add missing udfs in v3 + +**URL:** https://github.com/opensearch-project/sql/pull/3957 + +**Author:** @ishaoxy + +**Created:** 2025-07-31T09:20:52Z + +**State:** MERGED + +**Merged:** 2025-08-08T08:16:55Z + +**Changes:** +438 -9 (7 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Added some missing functions in v2 and v3. + +- add +- subtract +- multiply +- divide +- cosh +- sinh +- rint +- signum +- expm1 +- modulus + +Added related IT and doc. + +### Related Issues +Resolves #3898 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +Looks good to me + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/CoshFunction.java:None` + + +These UDF classes are quite similar. Maybe we can create a utility method to directly convert a Math function to UDF, like [adaptExprMethodToUDF](https://github.com/opensearch-project/sql/blob/edb3a0d7d074d753da43360f0c08314924f7fedf/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java#L195). + +It will simplify the operator creation to something like the following: + +```java +public static final SqlOperator COSH = + adaptMathFunctionToUDF( + Math.class, + "cosh", + PPLReturnTypes.DOUBLE_FORCE_NULLABLE, + NullPolicy.ARG0, + PPLOperandTypes.NUMERIC) + .toUDF("COSH"); +``` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +Since all math functions works with double or float, you can first convert the input, including `Double` and `BigDecimal` types, to `double`, then invoking `Math.xxx`. In this way, you can get rid of e.g. `integralCosh` and `floatingCosh`, replacing them with `Math.cosh`. + + +The following code snippets may help: + +```java +operand = Expressions.convert_(operand, Number.class); +operand = Expressions.call(operand, "doubleValue"); +return Expressions.call(Math.class, methodName, operand); +``` + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/math/CoshFunction.java:None` + + +fixed. + + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:None` + + +fixed. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:714` + + +The return type of `SqlStdOperatorTable.SIGN` (`ARG0`) seems to be different with `SIGNUM` (`INTEGER`) described in the documentation. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +Could be replaced with `org.apache.calcite.sql.type.ReturnTypes.DOUBLE_FORCE_NULLABLE` + + +### @yuancu on `docs/user/ppl/functions/math.rst:None` + + +The section markup should be of the same length with its title. Otherwise, it may not be properly rendered. + + +### @yuancu on `docs/user/ppl/functions/math.rst:None` + + +calculate**s*** + + +### @yuancu on `docs/user/ppl/functions/math.rst:None` + + +The argument type is usually written in the form of ARG0TYPE(s), ARG1TYPE(s),.... + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:267` + + +I'm a little unsure about whether should we explicitly add these functions or keep them as is. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +Thanks for reminding. Edited. + + +### @ishaoxy on `docs/user/ppl/functions/math.rst:None` + + +Fixed. + + +### @ishaoxy on `ppl/src/main/antlr/OpenSearchPPLLexer.g4:267` + + +I added them with reference to this issue https://github.com/opensearch-project/sql/issues/1339. If you think they are inappropriate, I can delete these functions. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:714` + + +Now the behavior is exactly the same as the existing function `sign`. Let me keep these changes first. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:237` + + +Can you add a Javadoc for this class? + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:237` + + +Of course. Added. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3956: [Backport 2.19-dev] Update commons-lang exclude rule to exclude it everywhere + +**URL:** https://github.com/opensearch-project/sql/pull/3956 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-31T04:23:58Z + +**State:** MERGED + +**Merged:** 2025-07-31T06:45:13Z + +**Changes:** +5 -0 (1 files) + + +## Description + +Backport edb3a0d7d074d753da43360f0c08314924f7fedf from #3932. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3955: [Backport 2.19-dev] Byte number should treated as Long in doc values (#3928) + +**URL:** https://github.com/opensearch-project/sql/pull/3955 + +**Author:** @LantaoJin + +**Created:** 2025-07-31T04:21:02Z + +**State:** MERGED + +**Merged:** 2025-07-31T18:11:19Z + +**Changes:** +53 -4 (8 files) + + +## Description + +Backport #3928 to 2.19-dev + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3954: [Backport 2.19-dev] Convert like function call to wildcard query for Calcite filter pushdown (#3915) + +**URL:** https://github.com/opensearch-project/sql/pull/3954 + +**Author:** @songkant-aws + +**Created:** 2025-07-31T03:37:23Z + +**State:** MERGED + +**Merged:** 2025-07-31T17:47:57Z + +**Changes:** +265 -81 (21 files) + + +## Description + +Convert like function call to wildcard query for Calcite filter pushdown (#3915) + +* Convert like function call to wildcard query for Calcite filter pushdown + + + +* Fix V2 expression like function bug and match its behavior in Calcite + + + +* Fix like default escape in Calcite + + + +* Fix tests + + + +* Fix spotless check + + + +* Address comments + + + +* Fix SQL IT correctness + + + +* Remove test log + + + +* Minor improve one CalciteLikeQueryIT + + + +--------- + + +(cherry picked from commit cd389833420aee5af31b850a6bbf6a309e861024) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3953: [Backport 2.19-dev] Fix flaky tests in RestHandlerClientYamlTestSuiteIT (#3901) + +**URL:** https://github.com/opensearch-project/sql/pull/3953 + +**Author:** @LantaoJin + +**Created:** 2025-07-31T03:30:14Z + +**State:** MERGED + +**Merged:** 2025-07-31T18:10:50Z + +**Changes:** +9 -7 (3 files) + + +## Description + +(cherry picked from commit 3c9766a04101be048611fd401730bd1a5aa45fe1 from #3901) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +The CI failure should be resolved by https://github.com/opensearch-project/sql/pull/3956 + + +--- + +# PR #3952: Disable a failed PPL query fallback to v2 by default + +**URL:** https://github.com/opensearch-project/sql/pull/3952 + +**Author:** @LantaoJin + +**Created:** 2025-07-31T03:15:31Z + +**State:** MERGED + +**Merged:** 2025-08-05T02:30:21Z + +**Changes:** +14 -125 (103 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev`, `v3.2.0` + + +## Description + +### Description +Disable a failed PPL query fallback to v2 by default: plugins.calcite.fallback.allowed=false + +### Related Issues +Resolves #3942 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +backport 2.19-dev? + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java:82` + + +#3921 seems didn't handle the calcite fallback disable case. I got failures following if disable calcite fallback. +``` +PPLPermissionsIT > testUserWithoutMappingPermissionCannotGetFieldMappings() FAILED + java.lang.AssertionError: expected:<403> but was:<500> + at org.junit.Assert.fail(Assert.java:89) + at org.junit.Assert.failNotEquals(Assert.java:835) + at org.junit.Assert.assertEquals(Assert.java:647) + at org.junit.Assert.assertEquals(Assert.java:633) + at org.opensearch.sql.security.PPLPermissionsIT.testUserWithoutMappingPermissionCannotGetFieldMappings(PPLPermissionsIT.java:568) +org.opensearch.sql.security.PPLPermissionsIT.testUserWithoutSearchPermissionCannotSearchIndex(): FAILURE 0.336s + +PPLPermissionsIT > testUserWithoutSearchPermissionCannotSearchIndex() FAILED + java.lang.AssertionError: expected:<403> but was:<500> + at org.junit.Assert.fail(Assert.java:89) + at org.junit.Assert.failNotEquals(Assert.java:835) + at org.junit.Assert.assertEquals(Assert.java:647) + at org.junit.Assert.assertEquals(Assert.java:633) + at org.opensearch.sql.security.PPLPermissionsIT.testUserWithoutSearchPermissionCannotSearchIndex(PPLPermissionsIT.java:550) +``` +To unblock this PR before code freeze. I allowed fallback in PPLPermissionsIT.java. cc @penghuo @vamsimanohar + + +### @vamsimanohar on `integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java:82` + + +I am ok for now, lets fix this after the code freeze. I haven't tested without fallback. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3948: [Backport 2.19-dev] Fix create PIT permissions issue + +**URL:** https://github.com/opensearch-project/sql/pull/3948 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-30T23:32:39Z + +**State:** MERGED + +**Merged:** 2025-08-04T16:39:14Z + +**Changes:** +797 -8 (6 files) + + +## Description + +Backport 739c4df42c67709369af968351e0bdeb11cab660 from #3921. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3943: Add missing command in index.rst + +**URL:** https://github.com/opensearch-project/sql/pull/3943 + +**Author:** @penghuo + +**Created:** 2025-07-30T16:45:48Z + +**State:** MERGED + +**Merged:** 2025-07-31T18:10:17Z + +**Changes:** +34 -34 (3 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev`, `v3.2.0` + +**Assignees:** @penghuo + + +## Description + +### Description +1. Add missing command in index.rst. +2. Clean up (Experimental)(From 3.0.0) from index.rst, since each command doc already have version info. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/limitations/limitations.rst:105` + + +Calcite support relevent functions. https://github.com/opensearch-project/sql/pull/3834 + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3940: Append limit operator for QUEERY_SIZE_LIMIT + +**URL:** https://github.com/opensearch-project/sql/pull/3940 + +**Author:** @qianheng-aws + +**Created:** 2025-07-30T08:32:52Z + +**State:** MERGED + +**Merged:** 2025-07-31T18:36:46Z + +**Changes:** +333 -239 (105 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Since we don't have plan to implement [eval push down](https://github.com/opensearch-project/sql/issues/3387), we still need to append LIMIT operator explicitly for QUERY_SIZE_LIMIT, in order to cover the case of `Project - Scan`. See the source comment: https://github.com/opensearch-project/sql/pull/3880#discussion_r2209634187 + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3879 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +can you add a case in `ExplainIT`? + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +It won't change the output plan of explain. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +Add an IT to verify querySizeLimit pushdown. +Q, should we revert code of https://github.com/opensearch-project/sql/pull/3880? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +We can reuse [LogicalSystemLimit](https://github.com/opensearch-project/sql/pull/3749) in explain. The physical plan is no diff. Then we can verify the querySizeLimit pushdown in explain IT. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:None` + + +- Discussed with @LantaoJin offline, I will add a new `LogicalSystemLimit` operator to represent the limit operator added by our system. It will be used in many other places as well where we need to add a limitation. So it will contain a field of `Type` to distinguish them. + +- I will manually revert few lines related, although they are compatible. That https://github.com/opensearch-project/sql/pull/3880 also contains a bug fix related to limit push down. + + +### @penghuo on `integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json:3` + + +we should supportLimit pushdown through aggregation. track with https://github.com/opensearch-project/sql/issues/3961 + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3939: [Backport 2.19-dev] Support casting date literal to timestamp (#3831) + +**URL:** https://github.com/opensearch-project/sql/pull/3939 + +**Author:** @yuancu + +**Created:** 2025-07-30T06:09:08Z + +**State:** MERGED + +**Merged:** 2025-07-31T03:21:27Z + +**Changes:** +493 -388 (22 files) + + +## Description + +## Description + +Backport #3831 to 2.19-dev + +## Commit message + +* Support following datetime casts in v2: date str -> timestamp +timestamp str -> date +timestamp str -> time + + + +* Invoke datetime UDF to cast date/time/timestamp with Calcite + + + +* Test cast to date/time/timestamp + + + +* Use stricter date and time formatters + + + +* Add a timestamp formatter for instantiating ExprTimestampValue + + + +* Simplify date/time expr constructors & parsers + + + +* Change to ExpressionEvaluationException when fail parsing malformat date/time strings + + + +--------- + + +(cherry picked from commit 30aba6588291f4d3f88ae21f98788608da261667) + + +### Related Issues +#3728 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3936: Publish datasources module for downstream reuse in unified-ppl-2.19-dev + +**URL:** https://github.com/opensearch-project/sql/pull/3936 + +**Author:** @Mebsina + +**Created:** 2025-07-29T22:57:47Z + +**State:** MERGED + +**Merged:** 2025-09-09T17:58:36Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `stalled` + + +## Description + +### Description +Add `datasources` module + +**Local Publishing Test** +Example after running the local publish command: + +``` +./gradlew publishUnifiedQueryPublicationToMavenLocal + +tree -d ~/.m2/repository/org/opensearch +└── query + ├── unified-query-api + │   └── 2.19.0.0-SNAPSHOT + ├── unified-query-common + │   └── 2.19.0.0-SNAPSHOT + ├── unified-query-core + │   └── 2.19.0.0-SNAPSHOT + ├── unified-query-datasources <- adding datasources + │   └── 2.19.0.0-SNAPSHOT + ├── unified-query-opensearch + │   └── 2.19.0.0-SNAPSHOT + ├── unified-query-ppl + │   └── 2.19.0.0-SNAPSHOT + ├── unified-query-protocol + │   └── 2.19.0.0-SNAPSHOT + └── unified-query-sql + └── 2.19.0.0-SNAPSHOT + +``` +### Related Issues +[3763](https://github.com/opensearch-project/sql/pull/3763) + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3935: Publish datasources module for downstream reuse in unified-ppl + +**URL:** https://github.com/opensearch-project/sql/pull/3935 + +**Author:** @Mebsina + +**Created:** 2025-07-29T22:49:18Z + +**State:** MERGED + +**Merged:** 2025-08-01T22:52:56Z + +**Changes:** +1 -1 (1 files) + + +## Description + +### Description +Add `datasources` module + +**Local Publishing Test** +Example after running the local publish command: + +``` +./gradlew publishUnifiedQueryPublicationToMavenLocal + +tree -d ~/.m2/repository/org/opensearch +└── query + ├── unified-query-api + │   └── 3.1.0.0-SNAPSHOT + ├── unified-query-common + │   └── 3.1.0.0-SNAPSHOT + ├── unified-query-core + │   └── 3.1.0.0-SNAPSHOT + ├── unified-query-datasources <- adding datasources + │   └── 3.1.0.0-SNAPSHOT + ├── unified-query-opensearch + │   └── 3.1.0.0-SNAPSHOT + ├── unified-query-ppl + │   └── 3.1.0.0-SNAPSHOT + ├── unified-query-protocol + │   └── 3.1.0.0-SNAPSHOT + └── unified-query-sql + └── 3.1.0.0-SNAPSHOT + +``` + +### Related Issues +[3763](https://github.com/opensearch-project/sql/pull/3763) + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3934: Enhance sort command in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/3934 + +**Author:** @ritvibhatt + +**Created:** 2025-07-29T16:53:51Z + +**State:** MERGED + +**Merged:** 2025-08-19T16:17:14Z + +**Changes:** +495 -34 (33 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev` + + +## Description + +### Description +Enhances sort command in PPL with result limiting, field type specification, and sort direction reversal + +### New syntax: +`sort [count] [+|-] sort-field, [+|-] sort-field, ... [desc|d] +` +- Count parameter limits the max number of results returned +- Type specifications: can specify field type of str(), num(), ip(), auto() default is auto +- desc/d reverses the results, if multiple fields are specified, reverses order of first field then for all duplicate values of the first field, reverses the order of the values of the second field and so on. + +Examples: + +``` +# Limit results to top 5 oldest employees +source=employees | sort 5 -age +``` + +``` +# Sort logs by source IP address treated as IP type, then by response_time treated as +numeric +source=logs | sort ip(source_ip), +num(response_time) +``` + +``` +# Sort employees by department and age, with both fields in descending order +source=employees | sort department, age desc +``` + +Performance testing: +Baseline: +```source=big5 | sort @timestamp | head ``` +head 10: Avg = 44ms, P90 = 47ms +head 100: Avg = 65ms, P90 = 73ms +head 1K: Avg = 221ms, P90 = 242ms +head 10K: Avg = 1868ms, P90 = 1885ms +head 100K: Avg = 1858ms, P90 = 1879ms + +```source=big5 | sort agent.type | head ``` +head 10: Avg = 26ms, P90 = 28ms +head 100: Avg = 32ms, P90 = 35ms +head 1K: Avg = 101ms, P90 = 102ms +head 10K: Avg = 805ms, P90 = 808ms +head 100K: Avg = 804ms, P90 = 809ms + +sort enhancements: +```source=big5 | sort @timestamp``` +head 10: Avg = 42ms, P90 = 46ms +head 100: Avg = 54ms, P90 = 58ms +head 1K: Avg = 213ms, P90 = 215ms +head 10K: Avg = 1856ms, P90 = 1879ms +head 100K: Avg = 1857ms, P90 = 1877ms + +```source=big5 | sort str(agent.type) | head ``` +head 10: Avg = 27ms, P90 = 35ms +head 100: Avg = 32ms, P90 = 33ms +head 1K: Avg = 101ms, P90 = 103ms +head 10K: Avg = 809ms, P90 = 827ms +head 100K: Avg = 811ms, P90 = 839ms + +```source=big5 | sort @timestamp desc | head ``` +head 10: Avg = 81ms, P90 = 83ms +head 100: Avg = 97ms, P90 = 98ms +head 1K: Avg = 270ms, P90 = 271ms +head 10K: Avg = 1952ms, P90 = 1974ms +head 100K: Avg = 1949ms, P90 = 1954ms + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] +#3931 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - COMMENTED + + +1. Add IT in CalcitePPLSortIT with calciteEnable() +2. Add ExplainIT to test sort with count is correctlly pushdown +3. Add test in PPLQueryDataAnonymizerTest, refer to https://github.com/opensearch-project/sql/pull/3867 + + +### @ykmr1224 - DISMISSED + + +LGTM other than minor comment. + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/sort.rst:25` + + +desc means reverse operations on fields list? e.g. `sort -a, -b desc` equal to `sort a, b`. + + +### @penghuo on `docs/user/ppl/cmd/sort.rst:143` + + +Include a test cases explain mutile fields reverse case + + +### @penghuo on `core/src/main/java/org/opensearch/sql/ast/tree/Sort.java:32` + + +add javadoc + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:457` + + +Add a test for multiple fields, e.g. `sort a -b desc` + + +### @penghuo on `core/src/main/java/org/opensearch/sql/planner/logical/LogicalSort.java:24` + + +Add javadoc. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/analysis/WindowExpressionAnalyzer.java:None` + + +change null -> 0? + + +### @ritvibhatt on `docs/user/ppl/cmd/sort.rst:25` + + +Yes it reverses the field sort order, that's spl behavior as well + + +### @ritvibhatt on `docs/user/ppl/cmd/sort.rst:143` + + +Added as Example 7 + + +### @ritvibhatt on `ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java:457` + + +Added `testSortCommandWithMultipleFieldsAndDesc` for multiple fields + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/analysis/WindowExpressionAnalyzer.java:None` + + +Yes that makes sense, changed to 0 + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Sort.java:None` + + +Let's don't put verbose comment. + + +### @ykmr1224 on `core/src/main/java/org/opensearch/sql/ast/tree/Sort.java:None` + + +Same as above. + + +### @LantaoJin on `docs/user/ppl/cmd/sort.rst:25` + + +can you add a use case with `desc` in the doc? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:None` + + +can you remove the useless `"` and add `desc` to explainIT +``` +"source=opensearch-sql_test_index_account | sort 5 age desc | fields age" +``` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java:166` + + +please add some tests with `desc` keyword, for example +``` +source=%s | sort 3 - account_number, age desc | fields account_number, age +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java:None` + + +Seems no limit if count < 0? maybe we need to explain the case in user doc. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:None` + + +And we both need the explain tests for +``` +``auto(field)``, ``str(field)``, ``ip(field)``, or ``num(field)`` +``` + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:142` + + +can you add case with `desc` + + +### @Swiddis on `core/src/main/java/org/opensearch/sql/planner/logical/LogicalSort.java:24` + + +I would prefer if we made this an `Optional` instead of having the nullability be implicit + +This makes the contract obvious to anyone using/reading the class. We can also improve the validation at this constructor, like requiring Integer be non-null and non-negative if supplied (allowing us to raise nice validation errors to users, "sort limit cannot be negative."). We can also map 0 to `None` here to simplify downstream logic. + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java:None` + + +Not applying limits for negative numbers is consistent with some other DBs, like SQLite. Lots of other SQL flavors return errors though. If we want to handle negatives I think we should raise an error at the construction of the Node. + + +### @Swiddis on `ppl/src/main/antlr/OpenSearchPPLParser.g4:141` + + +thought (non-blocking): If we create a dedicated `nonNegativeIntegerLiteral` or `positiveIntegerLiteral`, how would the error look to users if they supply a negative? It might be safer and generally applicable? + + +### @ritvibhatt on `docs/user/ppl/cmd/sort.rst:25` + + +Examples 6 and 7 use ```desc``` + + +### @ritvibhatt on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:None` + + +Added explain tests for both + + +### @ritvibhatt on `integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java:166` + + +Added another test with ```desc``` + + +### @ritvibhatt on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java:None` + + +Yes, SPL also has no limit if count < 0, added an explanation of that in the user doc + + +### @ritvibhatt on `ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java:142` + + +Added + + +### @ritvibhatt on `ppl/src/main/antlr/OpenSearchPPLParser.g4:141` + + +I can look into trying that out that in another PR + + +### @ritvibhatt on `core/src/main/java/org/opensearch/sql/planner/logical/LogicalSort.java:24` + + +Updated to make the return type of the getter for ```count``` ```Optional``` + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:177` + + +Is it releated to https://github.com/opensearch-project/sql/issues/3990. +if not, create an issue to track sort script pushdown. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/analysis/WindowExpressionAnalyzer.java:None` + + +nit: One way to avoid unnecessary change is add another constructor with default value count = 0. + + +## General Comments + + +### @penghuo + + +@Swiddis @LantaoJin Please take another look. + + +--- + +# PR #3933: [Backport 2.19-dev] Support casting to IP with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3933 + +**Author:** @yuancu + +**Created:** 2025-07-29T08:42:51Z + +**State:** MERGED + +**Merged:** 2025-07-30T05:38:49Z + +**Changes:** +723 -458 (15 files) + + +## Description + +## Description + +Backport #3919 to 2.19-dev + +## Commit message + +* Support casting ip + + + +* Refactor type checking for UDT (specifically, IP) + + + +* Remove IPCastFunctionTest + + + +* Remove unused IPUtil function + + + +* Fix error message of comparing with different types + + + +* Correct casting ip to string & update conversion doc + + + +* Change string representation form IP [ip] to [ip] to accomadate casting ip to string + + + +--------- + + +(cherry picked from commit 411395dd595359acdd94a54f0544aa20a6f9833d) + +### Related Issues +#3918 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3932: Update commons-lang exclude rule to exclude it everywhere + +**URL:** https://github.com/opensearch-project/sql/pull/3932 + +**Author:** @Swiddis + +**Created:** 2025-07-28T21:05:55Z + +**State:** MERGED + +**Merged:** 2025-07-31T04:23:00Z + +**Changes:** +5 -0 (1 files) + +**Labels:** `maintenance`, `backport 2.19-dev` + + +## Description + +### Description +`commons-lang` was excluded as part of the `calcite-core` import for the `core` config, but still was being pulled in via another transient dependency chain in `ppl`. This PR updates the exclude to remove `commons-lang` everywhere. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3930: Skip script encoding when run explain with 'extended' + +**URL:** https://github.com/opensearch-project/sql/pull/3930 + +**Author:** @LantaoJin + +**Created:** 2025-07-28T09:09:33Z + +**State:** MERGED + +**Merged:** 2025-08-13T07:48:35Z + +**Changes:** +39 -2 (7 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +skip script encoding when run explain with 'extended': +- /_plugins/_ppl/_explain?format=extended +- `explain extended source=t | ...` + +The RexNode expression `SCRIPT->=(-($1, 2), 30)` in explain command: +``` +{ + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [{ + "op": { + "name": "-", + "kind": "MINUS", + "syntax": "BINARY" + }, + "operands": [{ + "input": 2, + "name": "$2" + }, { + "literal": 2, + "type": { + "type": "INTEGER", + "nullable": false + } + }], + "type": { + "type": "BIGINT", + "nullable": true + } + }, { + "literal": 30, + "type": { + "type": "INTEGER", + "nullable": false + } + }] +} +``` +### Related Issues +Resolves #3929 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +So this is only for extended mode? If we want to troubleshoot a query, we can still get executable DSL by explain right? + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:None` + + +Why not put this line inner try-catch? I'm worrying if we might add more code between this line and try-catch in the future, it will cause exception and lead to skipping `isExplain.remove()`, which will then pollute the thread pool + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:None` + + +done + + +### @penghuo on `plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java:44` + + +if it is only for testing? move to test class + + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java:39` + + +Not sure if possible to avoid this global variable. If this is necessary, could you add clear javadoc for its purpose? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java:39` + + +Threadlocal variable should be static to avoid binding to one context instance (all context instances should share the same thread local variable). I will add javadoc in other PRs. + + +## General Comments + + +### @qianheng-aws + + +The main concern from me is that, after this change, the DSL in explain output is no longer able to be executed. I think maybe we should skip decoding the script for execution as well. + + +### @LantaoJin + + +> The main concern from me is that, after this change, the DSL in explain output is no longer able to be executed. I think maybe we should skip decoding the script for execution as well. + +The entire json contains unreadable objects (Java Map of schema), the current the explain output in my PR only display the EXPR part. So maybe we should keep the decode string in explain and add a parameter to skip decode process in explain. + + +### @LantaoJin + + +Refactor the code: skip script encoding when run the explain command in 'extended' mode. cc @qianheng-aws + + +### @LantaoJin + + +Ping @qianheng-aws @penghuo + + +### @qianheng-aws + + +Shall we add description in `explain.rst` to clarify the limitation of `explain extended` that its output DSL is not executable? + + +### @LantaoJin + + +> Shall we add description in `explain.rst` to clarify the limitation of `explain extended` that its output DSL is not executable? + +Seems we never mentioned the script in explain output can be executed. + + +--- + +# PR #3928: Byte number should treated as Long in doc values + +**URL:** https://github.com/opensearch-project/sql/pull/3928 + +**Author:** @LantaoJin + +**Created:** 2025-07-28T07:03:08Z + +**State:** MERGED + +**Merged:** 2025-07-30T05:46:10Z + +**Changes:** +53 -5 (8 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +` | eval a = abs(byte_number) | stats count() by a ` failed with +``` +Caused by: java.lang.ClassCastException: class java.lang.Long cannot be cast to class java.lang.Byte (java.lang.Long and java.lang.Byte are in module java.base of loader 'bootstrap') + at Reducer.apply(Unknown Source) ~[?:?] + at Reducer.apply(Unknown Source) ~[?:?] + at org.opensearch.sql.opensearch.storage.script.core.CalciteScript.lambda$execute$0(CalciteScript.java:45) ~[?:?] + at java.base/java.security.AccessController.doPrivileged(AccessController.java:319) ~[?:?] + at org.opensearch.sql.opensearch.storage.script.core.CalciteScript.execute(CalciteScript.java:43) ~[?:?] + at org.opensearch.sql.opensearch.storage.script.aggregation.CalciteAggregationScript.execute(CalciteAggregationScript.java:50) ~[?:?] +``` +### Related Issues +Resolves #3927 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:210` + + +this is the code change to fix the bug. The rests are protected programming which is not related to this bug. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3926: [Backport 2.19-dev] Support aggregation push down with scripts (#3916) + +**URL:** https://github.com/opensearch-project/sql/pull/3926 + +**Author:** @qianheng-aws + +**Created:** 2025-07-28T06:23:28Z + +**State:** MERGED + +**Merged:** 2025-07-29T16:17:39Z + +**Changes:** +652 -304 (27 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3921: Fix create PIT permissions issue + +**URL:** https://github.com/opensearch-project/sql/pull/3921 + +**Author:** @vamsimanohar + +**Created:** 2025-07-25T00:15:26Z + +**State:** MERGED + +**Merged:** 2025-07-30T23:32:26Z + +**Changes:** +805 -8 (6 files) + +**Labels:** `bug`, `backport 2.19-dev` + +**Assignees:** @vamsimanohar + + +## Description + +### Description +Currently there is a bug where PPL is asking for search permission across all indices even though the user has permissions to make a PPL query on a particular index. + +Issue is more detailed here: https://github.com/opensearch-project/security/issues/5508 + +Also, fixed few other issues with wrong error status codes in case of authorization errors. + + +Added more PPL permissions IT which covers the above bug. + +### Root cause +* Transport PIT search calls require index information for correct security authorization. +* Rest PIT search calls automatically handles this in Rest layer by decoding PIT information into indices before transport layer. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +It seems like the reason we weren't catching these security exceptions already is because we're relying heavily on catch-rethrow as `RuntimeException`. We should revisit the error strategy at the top level to see if we can start being more specific with our error handling overall, I'd like to see a future where we have a plugin-specific error code index (like how many shell tools define all their exit codes). + + +## Review Comments + + +### @ps48 on `opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java:119` + + +Nit. May be we can link the original sec design here: https://github.com/opensearch-project/security/issues/2087 + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3920: [Backport 2.19-dev] Default to UTC for date/time functions across PPL and SQL + +**URL:** https://github.com/opensearch-project/sql/pull/3920 + +**Author:** @yuancu + +**Created:** 2025-07-24T10:57:34Z + +**State:** MERGED + +**Merged:** 2025-07-25T20:33:18Z + +**Changes:** +161 -96 (18 files) + + +## Description + +### Description +Backport #3854 to 2.19-dev + +### Related Issues +#3725 + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3919: Support casting to IP with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3919 + +**Author:** @yuancu + +**Created:** 2025-07-24T08:33:52Z + +**State:** MERGED + +**Merged:** 2025-07-28T07:24:01Z + +**Changes:** +688 -485 (15 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Support casting to IP: + +It supports: +- IP -> IP +- IPv4 string -> IP +- IPv6 string -> IP + +**Implementation** + +I created a IP_CAST operator to handle IP casting. It is called in `ExtendedRexBuilder.makeCast`. Besides, I refactored type checkers relating to IP type checking, so that we don't have to create a new type checker class for each function that uses an UDT that can not be mapped to Calcite's data types. + +### Related Issues +Resolves #3918 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +Can you update the https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/conversion.rst doc, and the IP datatype is missing in https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/datatypes.rst + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @yuancu + + +I saw IP in the [PPL Data Types](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/datatypes.rst#ppl-data-types) section. Do you mean we need a specific section like [Date and Time Data Types](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/datatypes.rst#date-and-time-data-types) to describe its behaviors? + + +### @LantaoJin + + +> I saw IP in the [PPL Data Types](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/datatypes.rst#ppl-data-types) section. Do you mean we need a specific section like [Date and Time Data Types](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/datatypes.rst#date-and-time-data-types) to describe its behaviors? + +Yes. we miss a IP type description in datatypes.rst. please add one. But I think it could be a separated PR. + + +--- + +# PR #3916: Support aggregation push down with scripts + +**URL:** https://github.com/opensearch-project/sql/pull/3916 + +**Author:** @qianheng-aws + +**Created:** 2025-07-23T10:32:29Z + +**State:** MERGED + +**Merged:** 2025-07-26T03:39:06Z + +**Changes:** +647 -314 (27 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +1. Support aggregation push down with scripts +2. Support push down for some new added UDAF +3. Fix sort push down after aggregation by push the sort into agg bucket. + +### Related Issues +Resolves +https://github.com/opensearch-project/sql/issues/3386 +https://github.com/opensearch-project/sql/issues/3385 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java:None` + + +q33 -> q30 + + +## General Comments + + +### @qianheng-aws + + +CI failed because of the sort push down after aggregation is implemented improperly. Per v2's implementation, we should push down sort's collation into the inner of AggregationBuilder instead of the outer + +For instance, when do push down for PPL +``` +source = big5 +| eval range_bucket = case( + `metrics.size` < -10, 'range_1', + `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', + `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', + `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', + `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', + `metrics.size` >= 2000, 'range_6') +| stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax by range_bucket, span(`@timestamp`, 1h) as auto_span +| sort + range_bucket, + auto_span +``` +In Calcite, it will build SearchRequest like below: +``` +{ + "from": 0, + "size": 0, + "timeout": "1m", + "sort": [ + { + "range_bucket": { + "order": "asc", + "missing": "_first" + } + }, + { + "auto_span": { + "order": "asc", + "missing": "_first" + } + } + ], + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "range_bucket": { + "terms": { + "script": { + "source": "{\"langType\":\"calcite\" …" + } + } + } + ... + ... + } + ] + } + } + } +} +``` +And then cause exception: +``` +QueryShardException[No mapping found for [range_bucket] in order to sort on]\ +``` + +@yuancu + + +### @qianheng-aws + + +> CI failed because of the sort push down after aggregation is implemented improperly. Per v2's implementation, we should push down sort's collation inner AggregationBuilder instead of outer. + +Fixed by the lasted commit. The correct DSL should be: +``` +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 10, + "sources": [ + { + "range_bucket": { + "terms": { + "script": { + "source": "{\"langType\":\"calcite\",\"script\":\"......\"}", + "lang": "opensearch_compounded_script", + "params": { + "utcTimestamp": 1753349341218939000 + } + }, + "missing_bucket": true, + "missing_order": "first", + "order": "asc" + } + } + }, + { + "auto_span": { + "date_histogram": { + "field": "@timestamp", + "missing_bucket": true, + "missing_order": "first", + "order": "asc", + "fixed_interval": "1h" + } + } + } + ] + }, + ... + ... + } + } +} +``` + + +### @qianheng-aws + + +Ignore q30 because of too many scripts push down, which will then trigger ResourceMonitor restriction. + +This query will produce 89 scripts if push down enabled. +``` +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), ... SUM(ResolutionWidth + 89) FROM hits; +``` + +It should be addressed in a separate PR by merging multi-scripts or preventing such script push down. + +As said in this comments https://github.com/opensearch-project/sql/pull/3859#issuecomment-3060294305, filter script push down may trigger similar issue if filter conditions produce too many scripts + + +### @penghuo + + +> Ignore q30 because of too many scripts push down, which will then trigger ResourceMonitor restriction. + +Could u explain more about restriction? + +I try a more complex one, it works +``` +### +POST {{baseUrl}}/demo-index/_bulk +Content-Type: application/x-ndjson + +{ "index": {} } +{ "region": "us-east", "sales": 100 } +{ "index": {} } +{ "region": "us-east", "sales": 200 } +{ "index": {} } +{ "region": "us-west", "sales": 300 } +{ "index": {} } +{ "region": "us-west", "sales": 400 } + +### +POST {{baseUrl}}/_plugins/_ppl/_explain +Content-Type: application/x-ndjson + +{ + "query": "source=demo-index | eval sales1=sales+1, sales2=sales+2, sales3=sales+3, sales4=sales+4, sales5=sales+5, sales6=sales+6, sales7=sales+7, sales8=sales+8, sales9=sales+9, sales10=sales+10, sales11=sales+11, sales12=sales+12, sales13=sales+13, sales14=sales+14, sales15=sales+15, sales16=sales+16, sales17=sales+17, sales18=sales+18, sales19=sales+19, sales20=sales+20, sales21=sales+21, sales22=sales+22, sales23=sales+23, sales24=sales+24, sales25=sales+25, sales26=sales+26, sales27=sales+27, sales28=sales+28, sales29=sales+29, sales30=sales+30, sales31=sales+31, sales32=sales+32, sales33=sales+33, sales34=sales+34, sales35=sales+35, sales36=sales+36, sales37=sales+37, sales38=sales+38, sales39=sales+39, sales40=sales+40, sales41=sales+41, sales42=sales+42, sales43=sales+43, sales44=sales+44, sales45=sales+45, sales46=sales+46, sales47=sales+47, sales48=sales+48, sales49=sales+49, sales50=sales+50, sales51=sales+51, sales52=sales+52, sales53=sales+53, sales54=sales+54, sales55=sales+55, sales56=sales+56, sales57=sales+57, sales58=sales+58, sales59=sales+59, sales60=sales+60, sales61=sales+61, sales62=sales+62, sales63=sales+63, sales64=sales+64, sales65=sales+65, sales66=sales+66, sales67=sales+67, sales68=sales+68, sales69=sales+69, sales70=sales+70, sales71=sales+71, sales72=sales+72, sales73=sales+73, sales74=sales+74, sales75=sales+75, sales76=sales+76, sales77=sales+77, sales78=sales+78, sales79=sales+79, sales80=sales+80, sales81=sales+81, sales82=sales+82, sales83=sales+83, sales84=sales+84, sales85=sales+85, sales86=sales+86, sales87=sales+87, sales88=sales+88, sales89=sales+89, sales90=sales+90, sales91=sales+91, sales92=sales+92, sales93=sales+93, sales94=sales+94, sales95=sales+95, sales96=sales+96, sales97=sales+97, sales98=sales+98, sales99=sales+99, sales100=sales+100, sales101=sales+101, sales102=sales+102, sales103=sales+103, sales104=sales+104, sales105=sales+105, sales106=sales+106, sales107=sales+107, sales108=sales+108, sales109=sales+109, sales110=sales+110, sales111=sales+111, sales112=sales+112, sales113=sales+113, sales114=sales+114, sales115=sales+115, sales116=sales+116, sales117=sales+117, sales118=sales+118, sales119=sales+119, sales120=sales+120, sales121=sales+121, sales122=sales+122, sales123=sales+123, sales124=sales+124, sales125=sales+125, sales126=sales+126, sales127=sales+127, sales128=sales+128, sales129=sales+129, sales130=sales+130, sales131=sales+131, sales132=sales+132, sales133=sales+133, sales134=sales+134, sales135=sales+135, sales136=sales+136, sales137=sales+137, sales138=sales+138, sales139=sales+139, sales140=sales+140, sales141=sales+141, sales142=sales+142, sales143=sales+143, sales144=sales+144, sales145=sales+145, sales146=sales+146, sales147=sales+147, sales148=sales+148, sales149=sales+149, sales150=sales+150, sales151=sales+151, sales152=sales+152, sales153=sales+153, sales154=sales+154, sales155=sales+155, sales156=sales+156, sales157=sales+157, sales158=sales+158, sales159=sales+159, sales160=sales+160, sales161=sales+161, sales162=sales+162, sales163=sales+163, sales164=sales+164, sales165=sales+165, sales166=sales+166, sales167=sales+167, sales168=sales+168, sales169=sales+169, sales170=sales+170, sales171=sales+171, sales172=sales+172, sales173=sales+173, sales174=sales+174, sales175=sales+175, sales176=sales+176, sales177=sales+177, sales178=sales+178, sales179=sales+179, sales180=sales+180, sales181=sales+181, sales182=sales+182, sales183=sales+183, sales184=sales+184, sales185=sales+185, sales186=sales+186, sales187=sales+187, sales188=sales+188, sales189=sales+189, sales190=sales+190, sales191=sales+191, sales192=sales+192, sales193=sales+193, sales194=sales+194, sales195=sales+195, sales196=sales+196, sales197=sales+197, sales198=sales+198, sales199=sales+199, sales200=sales+200, sales201=sales+201, sales202=sales+202, sales203=sales+203, sales204=sales+204, sales205=sales+205, sales206=sales+206, sales207=sales+207, sales208=sales+208, sales209=sales+209, sales210=sales+210, sales211=sales+211, sales212=sales+212, sales213=sales+213, sales214=sales+214, sales215=sales+215, sales216=sales+216, sales217=sales+217, sales218=sales+218, sales219=sales+219, sales220=sales+220, sales221=sales+221, sales222=sales+222, sales223=sales+223, sales224=sales+224, sales225=sales+225, sales226=sales+226, sales227=sales+227, sales228=sales+228, sales229=sales+229, sales230=sales+230, sales231=sales+231, sales232=sales+232, sales233=sales+233, sales234=sales+234, sales235=sales+235, sales236=sales+236, sales237=sales+237, sales238=sales+238, sales239=sales+239, sales240=sales+240, sales241=sales+241, sales242=sales+242, sales243=sales+243, sales244=sales+244, sales245=sales+245, sales246=sales+246, sales247=sales+247, sales248=sales+248, sales249=sales+249, sales250=sales+250, sales251=sales+251, sales252=sales+252, sales253=sales+253, sales254=sales+254, sales255=sales+255, sales256=sales+256, sales257=sales+257, sales258=sales+258, sales259=sales+259, sales260=sales+260, sales261=sales+261, sales262=sales+262, sales263=sales+263, sales264=sales+264, sales265=sales+265, sales266=sales+266, sales267=sales+267, sales268=sales+268, sales269=sales+269, sales270=sales+270, sales271=sales+271, sales272=sales+272, sales273=sales+273, sales274=sales+274, sales275=sales+275, sales276=sales+276, sales277=sales+277, sales278=sales+278, sales279=sales+279, sales280=sales+280, sales281=sales+281, sales282=sales+282, sales283=sales+283, sales284=sales+284, sales285=sales+285, sales286=sales+286, sales287=sales+287, sales288=sales+288, sales289=sales+289, sales290=sales+290, sales291=sales+291, sales292=sales+292, sales293=sales+293, sales294=sales+294, sales295=sales+295, sales296=sales+296, sales297=sales+297, sales298=sales+298, sales299=sales+299, sales300=sales+300, sales301=sales+301, sales302=sales+302, sales303=sales+303, sales304=sales+304, sales305=sales+305, sales306=sales+306, sales307=sales+307, sales308=sales+308, sales309=sales+309, sales310=sales+310, sales311=sales+311, sales312=sales+312, sales313=sales+313, sales314=sales+314, sales315=sales+315, sales316=sales+316, sales317=sales+317, sales318=sales+318, sales319=sales+319, sales320=sales+320, sales321=sales+321, sales322=sales+322, sales323=sales+323, sales324=sales+324, sales325=sales+325, sales326=sales+326, sales327=sales+327, sales328=sales+328, sales329=sales+329, sales330=sales+330, sales331=sales+331, sales332=sales+332, sales333=sales+333, sales334=sales+334, sales335=sales+335, sales336=sales+336, sales337=sales+337, sales338=sales+338, sales339=sales+339, sales340=sales+340, sales341=sales+341, sales342=sales+342, sales343=sales+343, sales344=sales+344, sales345=sales+345, sales346=sales+346, sales347=sales+347, sales348=sales+348, sales349=sales+349, sales350=sales+350, sales351=sales+351, sales352=sales+352, sales353=sales+353, sales354=sales+354, sales355=sales+355, sales356=sales+356, sales357=sales+357, sales358=sales+358, sales359=sales+359, sales360=sales+360, sales361=sales+361, sales362=sales+362, sales363=sales+363, sales364=sales+364, sales365=sales+365, sales366=sales+366, sales367=sales+367, sales368=sales+368, sales369=sales+369, sales370=sales+370, sales371=sales+371, sales372=sales+372, sales373=sales+373, sales374=sales+374, sales375=sales+375, sales376=sales+376, sales377=sales+377, sales378=sales+378, sales379=sales+379, sales380=sales+380, sales381=sales+381, sales382=sales+382, sales383=sales+383, sales384=sales+384, sales385=sales+385, sales386=sales+386, sales387=sales+387, sales388=sales+388, sales389=sales+389, sales390=sales+390, sales391=sales+391, sales392=sales+392, sales393=sales+393, sales394=sales+394, sales395=sales+395, sales396=sales+396, sales397=sales+397, sales398=sales+398, sales399=sales+399, sales400=sales+400, sales401=sales+401, sales402=sales+402, sales403=sales+403, sales404=sales+404, sales405=sales+405, sales406=sales+406, sales407=sales+407, sales408=sales+408, sales409=sales+409, sales410=sales+410, sales411=sales+411, sales412=sales+412, sales413=sales+413, sales414=sales+414, sales415=sales+415, sales416=sales+416, sales417=sales+417, sales418=sales+418, sales419=sales+419, sales420=sales+420, sales421=sales+421, sales422=sales+422, sales423=sales+423, sales424=sales+424, sales425=sales+425, sales426=sales+426, sales427=sales+427, sales428=sales+428, sales429=sales+429, sales430=sales+430, sales431=sales+431, sales432=sales+432, sales433=sales+433, sales434=sales+434, sales435=sales+435, sales436=sales+436, sales437=sales+437, sales438=sales+438, sales439=sales+439, sales440=sales+440, sales441=sales+441, sales442=sales+442, sales443=sales+443, sales444=sales+444, sales445=sales+445, sales446=sales+446, sales447=sales+447, sales448=sales+448, sales449=sales+449, sales450=sales+450, sales451=sales+451, sales452=sales+452, sales453=sales+453, sales454=sales+454, sales455=sales+455, sales456=sales+456, sales457=sales+457, sales458=sales+458, sales459=sales+459, sales460=sales+460, sales461=sales+461, sales462=sales+462, sales463=sales+463, sales464=sales+464, sales465=sales+465, sales466=sales+466, sales467=sales+467, sales468=sales+468, sales469=sales+469, sales470=sales+470, sales471=sales+471, sales472=sales+472, sales473=sales+473, sales474=sales+474, sales475=sales+475, sales476=sales+476, sales477=sales+477, sales478=sales+478, sales479=sales+479, sales480=sales+480, sales481=sales+481, sales482=sales+482, sales483=sales+483, sales484=sales+484, sales485=sales+485, sales486=sales+486, sales487=sales+487, sales488=sales+488, sales489=sales+489, sales490=sales+490, sales491=sales+491, sales492=sales+492, sales493=sales+493, sales494=sales+494, sales495=sales+495, sales496=sales+496, sales497=sales+497, sales498=sales+498, sales499=sales+499, sales500=sales+500, sales501=sales+501, sales502=sales+502, sales503=sales+503, sales504=sales+504, sales505=sales+505, sales506=sales+506, sales507=sales+507, sales508=sales+508, sales509=sales+509, sales510=sales+510, sales511=sales+511, sales512=sales+512, sales513=sales+513, sales514=sales+514, sales515=sales+515, sales516=sales+516, sales517=sales+517, sales518=sales+518, sales519=sales+519, sales520=sales+520, sales521=sales+521, sales522=sales+522, sales523=sales+523, sales524=sales+524, sales525=sales+525, sales526=sales+526, sales527=sales+527, sales528=sales+528, sales529=sales+529, sales530=sales+530, sales531=sales+531, sales532=sales+532, sales533=sales+533, sales534=sales+534, sales535=sales+535, sales536=sales+536, sales537=sales+537, sales538=sales+538, sales539=sales+539, sales540=sales+540, sales541=sales+541, sales542=sales+542, sales543=sales+543, sales544=sales+544, sales545=sales+545, sales546=sales+546, sales547=sales+547, sales548=sales+548, sales549=sales+549, sales550=sales+550, sales551=sales+551, sales552=sales+552, sales553=sales+553, sales554=sales+554, sales555=sales+555, sales556=sales+556, sales557=sales+557, sales558=sales+558, sales559=sales+559, sales560=sales+560, sales561=sales+561, sales562=sales+562, sales563=sales+563, sales564=sales+564, sales565=sales+565, sales566=sales+566, sales567=sales+567, sales568=sales+568, sales569=sales+569, sales570=sales+570, sales571=sales+571, sales572=sales+572, sales573=sales+573, sales574=sales+574, sales575=sales+575, sales576=sales+576, sales577=sales+577, sales578=sales+578, sales579=sales+579, sales580=sales+580, sales581=sales+581, sales582=sales+582, sales583=sales+583, sales584=sales+584, sales585=sales+585, sales586=sales+586, sales587=sales+587, sales588=sales+588, sales589=sales+589, sales590=sales+590, sales591=sales+591, sales592=sales+592, sales593=sales+593, sales594=sales+594, sales595=sales+595, sales596=sales+596, sales597=sales+597, sales598=sales+598, sales599=sales+599, sales600=sales+600, sales601=sales+601, sales602=sales+602, sales603=sales+603, sales604=sales+604, sales605=sales+605, sales606=sales+606, sales607=sales+607, sales608=sales+608, sales609=sales+609, sales610=sales+610, sales611=sales+611, sales612=sales+612, sales613=sales+613, sales614=sales+614, sales615=sales+615, sales616=sales+616, sales617=sales+617, sales618=sales+618, sales619=sales+619, sales620=sales+620, sales621=sales+621, sales622=sales+622, sales623=sales+623, sales624=sales+624, sales625=sales+625, sales626=sales+626, sales627=sales+627, sales628=sales+628, sales629=sales+629, sales630=sales+630, sales631=sales+631, sales632=sales+632, sales633=sales+633, sales634=sales+634, sales635=sales+635, sales636=sales+636, sales637=sales+637, sales638=sales+638, sales639=sales+639, sales640=sales+640, sales641=sales+641, sales642=sales+642, sales643=sales+643, sales644=sales+644, sales645=sales+645, sales646=sales+646, sales647=sales+647, sales648=sales+648, sales649=sales+649, sales650=sales+650, sales651=sales+651, sales652=sales+652, sales653=sales+653, sales654=sales+654, sales655=sales+655, sales656=sales+656, sales657=sales+657, sales658=sales+658, sales659=sales+659, sales660=sales+660, sales661=sales+661, sales662=sales+662, sales663=sales+663, sales664=sales+664, sales665=sales+665, sales666=sales+666, sales667=sales+667, sales668=sales+668, sales669=sales+669, sales670=sales+670, sales671=sales+671, sales672=sales+672, sales673=sales+673, sales674=sales+674, sales675=sales+675, sales676=sales+676, sales677=sales+677, sales678=sales+678, sales679=sales+679, sales680=sales+680, sales681=sales+681, sales682=sales+682, sales683=sales+683, sales684=sales+684, sales685=sales+685, sales686=sales+686, sales687=sales+687, sales688=sales+688, sales689=sales+689, sales690=sales+690, sales691=sales+691, sales692=sales+692, sales693=sales+693, sales694=sales+694, sales695=sales+695, sales696=sales+696, sales697=sales+697, sales698=sales+698, sales699=sales+699, sales700=sales+700, sales701=sales+701, sales702=sales+702, sales703=sales+703, sales704=sales+704, sales705=sales+705, sales706=sales+706, sales707=sales+707, sales708=sales+708, sales709=sales+709, sales710=sales+710, sales711=sales+711, sales712=sales+712, sales713=sales+713, sales714=sales+714, sales715=sales+715, sales716=sales+716, sales717=sales+717, sales718=sales+718, sales719=sales+719, sales720=sales+720, sales721=sales+721, sales722=sales+722, sales723=sales+723, sales724=sales+724, sales725=sales+725, sales726=sales+726, sales727=sales+727, sales728=sales+728, sales729=sales+729, sales730=sales+730, sales731=sales+731, sales732=sales+732, sales733=sales+733, sales734=sales+734, sales735=sales+735, sales736=sales+736, sales737=sales+737, sales738=sales+738, sales739=sales+739, sales740=sales+740, sales741=sales+741, sales742=sales+742, sales743=sales+743, sales744=sales+744, sales745=sales+745, sales746=sales+746, sales747=sales+747, sales748=sales+748, sales749=sales+749, sales750=sales+750, sales751=sales+751, sales752=sales+752, sales753=sales+753, sales754=sales+754, sales755=sales+755, sales756=sales+756, sales757=sales+757, sales758=sales+758, sales759=sales+759, sales760=sales+760, sales761=sales+761, sales762=sales+762, sales763=sales+763, sales764=sales+764, sales765=sales+765, sales766=sales+766, sales767=sales+767, sales768=sales+768, sales769=sales+769, sales770=sales+770, sales771=sales+771, sales772=sales+772, sales773=sales+773, sales774=sales+774, sales775=sales+775, sales776=sales+776, sales777=sales+777, sales778=sales+778, sales779=sales+779, sales780=sales+780, sales781=sales+781, sales782=sales+782, sales783=sales+783, sales784=sales+784, sales785=sales+785, sales786=sales+786, sales787=sales+787, sales788=sales+788, sales789=sales+789, sales790=sales+790, sales791=sales+791, sales792=sales+792, sales793=sales+793, sales794=sales+794, sales795=sales+795, sales796=sales+796, sales797=sales+797, sales798=sales+798, sales799=sales+799, sales800=sales+800, sales801=sales+801, sales802=sales+802, sales803=sales+803, sales804=sales+804, sales805=sales+805, sales806=sales+806, sales807=sales+807, sales808=sales+808, sales809=sales+809, sales810=sales+810, sales811=sales+811, sales812=sales+812, sales813=sales+813, sales814=sales+814, sales815=sales+815, sales816=sales+816, sales817=sales+817, sales818=sales+818, sales819=sales+819, sales820=sales+820, sales821=sales+821, sales822=sales+822, sales823=sales+823, sales824=sales+824, sales825=sales+825, sales826=sales+826, sales827=sales+827, sales828=sales+828, sales829=sales+829, sales830=sales+830, sales831=sales+831, sales832=sales+832, sales833=sales+833, sales834=sales+834, sales835=sales+835, sales836=sales+836, sales837=sales+837, sales838=sales+838, sales839=sales+839, sales840=sales+840, sales841=sales+841, sales842=sales+842, sales843=sales+843, sales844=sales+844, sales845=sales+845, sales846=sales+846, sales847=sales+847, sales848=sales+848, sales849=sales+849, sales850=sales+850, sales851=sales+851, sales852=sales+852, sales853=sales+853, sales854=sales+854, sales855=sales+855, sales856=sales+856, sales857=sales+857, sales858=sales+858, sales859=sales+859, sales860=sales+860, sales861=sales+861, sales862=sales+862, sales863=sales+863, sales864=sales+864, sales865=sales+865, sales866=sales+866, sales867=sales+867, sales868=sales+868, sales869=sales+869, sales870=sales+870, sales871=sales+871, sales872=sales+872, sales873=sales+873, sales874=sales+874, sales875=sales+875, sales876=sales+876, sales877=sales+877, sales878=sales+878, sales879=sales+879, sales880=sales+880, sales881=sales+881, sales882=sales+882, sales883=sales+883, sales884=sales+884, sales885=sales+885, sales886=sales+886, sales887=sales+887, sales888=sales+888, sales889=sales+889, sales890=sales+890, sales891=sales+891, sales892=sales+892, sales893=sales+893, sales894=sales+894, sales895=sales+895, sales896=sales+896, sales897=sales+897, sales898=sales+898, sales899=sales+899, sales900=sales+900, sales901=sales+901, sales902=sales+902, sales903=sales+903, sales904=sales+904, sales905=sales+905, sales906=sales+906, sales907=sales+907, sales908=sales+908, sales909=sales+909, sales910=sales+910, sales911=sales+911, sales912=sales+912, sales913=sales+913, sales914=sales+914, sales915=sales+915, sales916=sales+916, sales917=sales+917, sales918=sales+918, sales919=sales+919, sales920=sales+920, sales921=sales+921, sales922=sales+922, sales923=sales+923, sales924=sales+924, sales925=sales+925, sales926=sales+926, sales927=sales+927, sales928=sales+928, sales929=sales+929, sales930=sales+930, sales931=sales+931, sales932=sales+932, sales933=sales+933, sales934=sales+934, sales935=sales+935, sales936=sales+936, sales937=sales+937, sales938=sales+938, sales939=sales+939, sales940=sales+940, sales941=sales+941, sales942=sales+942, sales943=sales+943, sales944=sales+944, sales945=sales+945, sales946=sales+946, sales947=sales+947, sales948=sales+948, sales949=sales+949, sales950=sales+950, sales951=sales+951, sales952=sales+952, sales953=sales+953, sales954=sales+954, sales955=sales+955, sales956=sales+956, sales957=sales+957, sales958=sales+958, sales959=sales+959, sales960=sales+960, sales961=sales+961, sales962=sales+962, sales963=sales+963, sales964=sales+964, sales965=sales+965, sales966=sales+966, sales967=sales+967, sales968=sales+968, sales969=sales+969, sales970=sales+970, sales971=sales+971, sales972=sales+972, sales973=sales+973, sales974=sales+974, sales975=sales+975, sales976=sales+976, sales977=sales+977, sales978=sales+978, sales979=sales+979, sales980=sales+980, sales981=sales+981, sales982=sales+982, sales983=sales+983, sales984=sales+984, sales985=sales+985, sales986=sales+986, sales987=sales+987, sales988=sales+988, sales989=sales+989, sales990=sales+990, sales991=sales+991, sales992=sales+992, sales993=sales+993, sales994=sales+994, sales995=sales+995, sales996=sales+996, sales997=sales+997, sales998=sales+998, sales999=sales+999, sales1000=sales+1000 | stats sum(sales) as sum0, sum(sales1) as sum1, sum(sales2) as sum2, sum(sales3) as sum3, sum(sales4) as sum4, sum(sales5) as sum5, sum(sales6) as sum6, sum(sales7) as sum7, sum(sales8) as sum8, sum(sales9) as sum9, sum(sales10) as sum10, sum(sales11) as sum11, sum(sales12) as sum12, sum(sales13) as sum13, sum(sales14) as sum14, sum(sales15) as sum15, sum(sales16) as sum16, sum(sales17) as sum17, sum(sales18) as sum18, sum(sales19) as sum19, sum(sales20) as sum20, sum(sales21) as sum21, sum(sales22) as sum22, sum(sales23) as sum23, sum(sales24) as sum24, sum(sales25) as sum25, sum(sales26) as sum26, sum(sales27) as sum27, sum(sales28) as sum28, sum(sales29) as sum29, sum(sales30) as sum30, sum(sales31) as sum31, sum(sales32) as sum32, sum(sales33) as sum33, sum(sales34) as sum34, sum(sales35) as sum35, sum(sales36) as sum36, sum(sales37) as sum37, sum(sales38) as sum38, sum(sales39) as sum39, sum(sales40) as sum40, sum(sales41) as sum41, sum(sales42) as sum42, sum(sales43) as sum43, sum(sales44) as sum44, sum(sales45) as sum45, sum(sales46) as sum46, sum(sales47) as sum47, sum(sales48) as sum48, sum(sales49) as sum49, sum(sales50) as sum50, sum(sales51) as sum51, sum(sales52) as sum52, sum(sales53) as sum53, sum(sales54) as sum54, sum(sales55) as sum55, sum(sales56) as sum56, sum(sales57) as sum57, sum(sales58) as sum58, sum(sales59) as sum59, sum(sales60) as sum60, sum(sales61) as sum61, sum(sales62) as sum62, sum(sales63) as sum63, sum(sales64) as sum64, sum(sales65) as sum65, sum(sales66) as sum66, sum(sales67) as sum67, sum(sales68) as sum68, sum(sales69) as sum69, sum(sales70) as sum70, sum(sales71) as sum71, sum(sales72) as sum72, sum(sales73) as sum73, sum(sales74) as sum74, sum(sales75) as sum75, sum(sales76) as sum76, sum(sales77) as sum77, sum(sales78) as sum78, sum(sales79) as sum79, sum(sales80) as sum80, sum(sales81) as sum81, sum(sales82) as sum82, sum(sales83) as sum83, sum(sales84) as sum84, sum(sales85) as sum85, sum(sales86) as sum86, sum(sales87) as sum87, sum(sales88) as sum88, sum(sales89) as sum89, sum(sales90) as sum90, sum(sales91) as sum91, sum(sales92) as sum92, sum(sales93) as sum93, sum(sales94) as sum94, sum(sales95) as sum95, sum(sales96) as sum96, sum(sales97) as sum97, sum(sales98) as sum98, sum(sales99) as sum99, sum(sales100) as sum100, sum(sales101) as sum101, sum(sales102) as sum102, sum(sales103) as sum103, sum(sales104) as sum104, sum(sales105) as sum105, sum(sales106) as sum106, sum(sales107) as sum107, sum(sales108) as sum108, sum(sales109) as sum109, sum(sales110) as sum110, sum(sales111) as sum111, sum(sales112) as sum112, sum(sales113) as sum113, sum(sales114) as sum114, sum(sales115) as sum115, sum(sales116) as sum116, sum(sales117) as sum117, sum(sales118) as sum118, sum(sales119) as sum119, sum(sales120) as sum120, sum(sales121) as sum121, sum(sales122) as sum122, sum(sales123) as sum123, sum(sales124) as sum124, sum(sales125) as sum125, sum(sales126) as sum126, sum(sales127) as sum127, sum(sales128) as sum128, sum(sales129) as sum129, sum(sales130) as sum130, sum(sales131) as sum131, sum(sales132) as sum132, sum(sales133) as sum133, sum(sales134) as sum134, sum(sales135) as sum135, sum(sales136) as sum136, sum(sales137) as sum137, sum(sales138) as sum138, sum(sales139) as sum139, sum(sales140) as sum140, sum(sales141) as sum141, sum(sales142) as sum142, sum(sales143) as sum143, sum(sales144) as sum144, sum(sales145) as sum145, sum(sales146) as sum146, sum(sales147) as sum147, sum(sales148) as sum148, sum(sales149) as sum149, sum(sales150) as sum150, sum(sales151) as sum151, sum(sales152) as sum152, sum(sales153) as sum153, sum(sales154) as sum154, sum(sales155) as sum155, sum(sales156) as sum156, sum(sales157) as sum157, sum(sales158) as sum158, sum(sales159) as sum159, sum(sales160) as sum160, sum(sales161) as sum161, sum(sales162) as sum162, sum(sales163) as sum163, sum(sales164) as sum164, sum(sales165) as sum165, sum(sales166) as sum166, sum(sales167) as sum167, sum(sales168) as sum168, sum(sales169) as sum169, sum(sales170) as sum170, sum(sales171) as sum171, sum(sales172) as sum172, sum(sales173) as sum173, sum(sales174) as sum174, sum(sales175) as sum175, sum(sales176) as sum176, sum(sales177) as sum177, sum(sales178) as sum178, sum(sales179) as sum179, sum(sales180) as sum180, sum(sales181) as sum181, sum(sales182) as sum182, sum(sales183) as sum183, sum(sales184) as sum184, sum(sales185) as sum185, sum(sales186) as sum186, sum(sales187) as sum187, sum(sales188) as sum188, sum(sales189) as sum189, sum(sales190) as sum190, sum(sales191) as sum191, sum(sales192) as sum192, sum(sales193) as sum193, sum(sales194) as sum194, sum(sales195) as sum195, sum(sales196) as sum196, sum(sales197) as sum197, sum(sales198) as sum198, sum(sales199) as sum199, sum(sales200) as sum200, sum(sales201) as sum201, sum(sales202) as sum202, sum(sales203) as sum203, sum(sales204) as sum204, sum(sales205) as sum205, sum(sales206) as sum206, sum(sales207) as sum207, sum(sales208) as sum208, sum(sales209) as sum209, sum(sales210) as sum210, sum(sales211) as sum211, sum(sales212) as sum212, sum(sales213) as sum213, sum(sales214) as sum214, sum(sales215) as sum215, sum(sales216) as sum216, sum(sales217) as sum217, sum(sales218) as sum218, sum(sales219) as sum219, sum(sales220) as sum220, sum(sales221) as sum221, sum(sales222) as sum222, sum(sales223) as sum223, sum(sales224) as sum224, sum(sales225) as sum225, sum(sales226) as sum226, sum(sales227) as sum227, sum(sales228) as sum228, sum(sales229) as sum229, sum(sales230) as sum230, sum(sales231) as sum231, sum(sales232) as sum232, sum(sales233) as sum233, sum(sales234) as sum234, sum(sales235) as sum235, sum(sales236) as sum236, sum(sales237) as sum237, sum(sales238) as sum238, sum(sales239) as sum239, sum(sales240) as sum240, sum(sales241) as sum241, sum(sales242) as sum242, sum(sales243) as sum243, sum(sales244) as sum244, sum(sales245) as sum245, sum(sales246) as sum246, sum(sales247) as sum247, sum(sales248) as sum248, sum(sales249) as sum249, sum(sales250) as sum250, sum(sales251) as sum251, sum(sales252) as sum252, sum(sales253) as sum253, sum(sales254) as sum254, sum(sales255) as sum255, sum(sales256) as sum256, sum(sales257) as sum257, sum(sales258) as sum258, sum(sales259) as sum259, sum(sales260) as sum260, sum(sales261) as sum261, sum(sales262) as sum262, sum(sales263) as sum263, sum(sales264) as sum264, sum(sales265) as sum265, sum(sales266) as sum266, sum(sales267) as sum267, sum(sales268) as sum268, sum(sales269) as sum269, sum(sales270) as sum270, sum(sales271) as sum271, sum(sales272) as sum272, sum(sales273) as sum273, sum(sales274) as sum274, sum(sales275) as sum275, sum(sales276) as sum276, sum(sales277) as sum277, sum(sales278) as sum278, sum(sales279) as sum279, sum(sales280) as sum280, sum(sales281) as sum281, sum(sales282) as sum282, sum(sales283) as sum283, sum(sales284) as sum284, sum(sales285) as sum285, sum(sales286) as sum286, sum(sales287) as sum287, sum(sales288) as sum288, sum(sales289) as sum289, sum(sales290) as sum290, sum(sales291) as sum291, sum(sales292) as sum292, sum(sales293) as sum293, sum(sales294) as sum294, sum(sales295) as sum295, sum(sales296) as sum296, sum(sales297) as sum297, sum(sales298) as sum298, sum(sales299) as sum299, sum(sales300) as sum300, sum(sales301) as sum301, sum(sales302) as sum302, sum(sales303) as sum303, sum(sales304) as sum304, sum(sales305) as sum305, sum(sales306) as sum306, sum(sales307) as sum307, sum(sales308) as sum308, sum(sales309) as sum309, sum(sales310) as sum310, sum(sales311) as sum311, sum(sales312) as sum312, sum(sales313) as sum313, sum(sales314) as sum314, sum(sales315) as sum315, sum(sales316) as sum316, sum(sales317) as sum317, sum(sales318) as sum318, sum(sales319) as sum319, sum(sales320) as sum320, sum(sales321) as sum321, sum(sales322) as sum322, sum(sales323) as sum323, sum(sales324) as sum324, sum(sales325) as sum325, sum(sales326) as sum326, sum(sales327) as sum327, sum(sales328) as sum328, sum(sales329) as sum329, sum(sales330) as sum330, sum(sales331) as sum331, sum(sales332) as sum332, sum(sales333) as sum333, sum(sales334) as sum334, sum(sales335) as sum335, sum(sales336) as sum336, sum(sales337) as sum337, sum(sales338) as sum338, sum(sales339) as sum339, sum(sales340) as sum340, sum(sales341) as sum341, sum(sales342) as sum342, sum(sales343) as sum343, sum(sales344) as sum344, sum(sales345) as sum345, sum(sales346) as sum346, sum(sales347) as sum347, sum(sales348) as sum348, sum(sales349) as sum349, sum(sales350) as sum350, sum(sales351) as sum351, sum(sales352) as sum352, sum(sales353) as sum353, sum(sales354) as sum354, sum(sales355) as sum355, sum(sales356) as sum356, sum(sales357) as sum357, sum(sales358) as sum358, sum(sales359) as sum359, sum(sales360) as sum360, sum(sales361) as sum361, sum(sales362) as sum362, sum(sales363) as sum363, sum(sales364) as sum364, sum(sales365) as sum365, sum(sales366) as sum366, sum(sales367) as sum367, sum(sales368) as sum368, sum(sales369) as sum369, sum(sales370) as sum370, sum(sales371) as sum371, sum(sales372) as sum372, sum(sales373) as sum373, sum(sales374) as sum374, sum(sales375) as sum375, sum(sales376) as sum376, sum(sales377) as sum377, sum(sales378) as sum378, sum(sales379) as sum379, sum(sales380) as sum380, sum(sales381) as sum381, sum(sales382) as sum382, sum(sales383) as sum383, sum(sales384) as sum384, sum(sales385) as sum385, sum(sales386) as sum386, sum(sales387) as sum387, sum(sales388) as sum388, sum(sales389) as sum389, sum(sales390) as sum390, sum(sales391) as sum391, sum(sales392) as sum392, sum(sales393) as sum393, sum(sales394) as sum394, sum(sales395) as sum395, sum(sales396) as sum396, sum(sales397) as sum397, sum(sales398) as sum398, sum(sales399) as sum399, sum(sales400) as sum400, sum(sales401) as sum401, sum(sales402) as sum402, sum(sales403) as sum403, sum(sales404) as sum404, sum(sales405) as sum405, sum(sales406) as sum406, sum(sales407) as sum407, sum(sales408) as sum408, sum(sales409) as sum409, sum(sales410) as sum410, sum(sales411) as sum411, sum(sales412) as sum412, sum(sales413) as sum413, sum(sales414) as sum414, sum(sales415) as sum415, sum(sales416) as sum416, sum(sales417) as sum417, sum(sales418) as sum418, sum(sales419) as sum419, sum(sales420) as sum420, sum(sales421) as sum421, sum(sales422) as sum422, sum(sales423) as sum423, sum(sales424) as sum424, sum(sales425) as sum425, sum(sales426) as sum426, sum(sales427) as sum427, sum(sales428) as sum428, sum(sales429) as sum429, sum(sales430) as sum430, sum(sales431) as sum431, sum(sales432) as sum432, sum(sales433) as sum433, sum(sales434) as sum434, sum(sales435) as sum435, sum(sales436) as sum436, sum(sales437) as sum437, sum(sales438) as sum438, sum(sales439) as sum439, sum(sales440) as sum440, sum(sales441) as sum441, sum(sales442) as sum442, sum(sales443) as sum443, sum(sales444) as sum444, sum(sales445) as sum445, sum(sales446) as sum446, sum(sales447) as sum447, sum(sales448) as sum448, sum(sales449) as sum449, sum(sales450) as sum450, sum(sales451) as sum451, sum(sales452) as sum452, sum(sales453) as sum453, sum(sales454) as sum454, sum(sales455) as sum455, sum(sales456) as sum456, sum(sales457) as sum457, sum(sales458) as sum458, sum(sales459) as sum459, sum(sales460) as sum460, sum(sales461) as sum461, sum(sales462) as sum462, sum(sales463) as sum463, sum(sales464) as sum464, sum(sales465) as sum465, sum(sales466) as sum466, sum(sales467) as sum467, sum(sales468) as sum468, sum(sales469) as sum469, sum(sales470) as sum470, sum(sales471) as sum471, sum(sales472) as sum472, sum(sales473) as sum473, sum(sales474) as sum474, sum(sales475) as sum475, sum(sales476) as sum476, sum(sales477) as sum477, sum(sales478) as sum478, sum(sales479) as sum479, sum(sales480) as sum480, sum(sales481) as sum481, sum(sales482) as sum482, sum(sales483) as sum483, sum(sales484) as sum484, sum(sales485) as sum485, sum(sales486) as sum486, sum(sales487) as sum487, sum(sales488) as sum488, sum(sales489) as sum489, sum(sales490) as sum490, sum(sales491) as sum491, sum(sales492) as sum492, sum(sales493) as sum493, sum(sales494) as sum494, sum(sales495) as sum495, sum(sales496) as sum496, sum(sales497) as sum497, sum(sales498) as sum498, sum(sales499) as sum499, sum(sales500) as sum500, sum(sales501) as sum501, sum(sales502) as sum502, sum(sales503) as sum503, sum(sales504) as sum504, sum(sales505) as sum505, sum(sales506) as sum506, sum(sales507) as sum507, sum(sales508) as sum508, sum(sales509) as sum509, sum(sales510) as sum510, sum(sales511) as sum511, sum(sales512) as sum512, sum(sales513) as sum513, sum(sales514) as sum514, sum(sales515) as sum515, sum(sales516) as sum516, sum(sales517) as sum517, sum(sales518) as sum518, sum(sales519) as sum519, sum(sales520) as sum520, sum(sales521) as sum521, sum(sales522) as sum522, sum(sales523) as sum523, sum(sales524) as sum524, sum(sales525) as sum525, sum(sales526) as sum526, sum(sales527) as sum527, sum(sales528) as sum528, sum(sales529) as sum529, sum(sales530) as sum530, sum(sales531) as sum531, sum(sales532) as sum532, sum(sales533) as sum533, sum(sales534) as sum534, sum(sales535) as sum535, sum(sales536) as sum536, sum(sales537) as sum537, sum(sales538) as sum538, sum(sales539) as sum539, sum(sales540) as sum540, sum(sales541) as sum541, sum(sales542) as sum542, sum(sales543) as sum543, sum(sales544) as sum544, sum(sales545) as sum545, sum(sales546) as sum546, sum(sales547) as sum547, sum(sales548) as sum548, sum(sales549) as sum549, sum(sales550) as sum550, sum(sales551) as sum551, sum(sales552) as sum552, sum(sales553) as sum553, sum(sales554) as sum554, sum(sales555) as sum555, sum(sales556) as sum556, sum(sales557) as sum557, sum(sales558) as sum558, sum(sales559) as sum559, sum(sales560) as sum560, sum(sales561) as sum561, sum(sales562) as sum562, sum(sales563) as sum563, sum(sales564) as sum564, sum(sales565) as sum565, sum(sales566) as sum566, sum(sales567) as sum567, sum(sales568) as sum568, sum(sales569) as sum569, sum(sales570) as sum570, sum(sales571) as sum571, sum(sales572) as sum572, sum(sales573) as sum573, sum(sales574) as sum574, sum(sales575) as sum575, sum(sales576) as sum576, sum(sales577) as sum577, sum(sales578) as sum578, sum(sales579) as sum579, sum(sales580) as sum580, sum(sales581) as sum581, sum(sales582) as sum582, sum(sales583) as sum583, sum(sales584) as sum584, sum(sales585) as sum585, sum(sales586) as sum586, sum(sales587) as sum587, sum(sales588) as sum588, sum(sales589) as sum589, sum(sales590) as sum590, sum(sales591) as sum591, sum(sales592) as sum592, sum(sales593) as sum593, sum(sales594) as sum594, sum(sales595) as sum595, sum(sales596) as sum596, sum(sales597) as sum597, sum(sales598) as sum598, sum(sales599) as sum599, sum(sales600) as sum600, sum(sales601) as sum601, sum(sales602) as sum602, sum(sales603) as sum603, sum(sales604) as sum604, sum(sales605) as sum605, sum(sales606) as sum606, sum(sales607) as sum607, sum(sales608) as sum608, sum(sales609) as sum609, sum(sales610) as sum610, sum(sales611) as sum611, sum(sales612) as sum612, sum(sales613) as sum613, sum(sales614) as sum614, sum(sales615) as sum615, sum(sales616) as sum616, sum(sales617) as sum617, sum(sales618) as sum618, sum(sales619) as sum619, sum(sales620) as sum620, sum(sales621) as sum621, sum(sales622) as sum622, sum(sales623) as sum623, sum(sales624) as sum624, sum(sales625) as sum625, sum(sales626) as sum626, sum(sales627) as sum627, sum(sales628) as sum628, sum(sales629) as sum629, sum(sales630) as sum630, sum(sales631) as sum631, sum(sales632) as sum632, sum(sales633) as sum633, sum(sales634) as sum634, sum(sales635) as sum635, sum(sales636) as sum636, sum(sales637) as sum637, sum(sales638) as sum638, sum(sales639) as sum639, sum(sales640) as sum640, sum(sales641) as sum641, sum(sales642) as sum642, sum(sales643) as sum643, sum(sales644) as sum644, sum(sales645) as sum645, sum(sales646) as sum646, sum(sales647) as sum647, sum(sales648) as sum648, sum(sales649) as sum649, sum(sales650) as sum650, sum(sales651) as sum651, sum(sales652) as sum652, sum(sales653) as sum653, sum(sales654) as sum654, sum(sales655) as sum655, sum(sales656) as sum656, sum(sales657) as sum657, sum(sales658) as sum658, sum(sales659) as sum659, sum(sales660) as sum660, sum(sales661) as sum661, sum(sales662) as sum662, sum(sales663) as sum663, sum(sales664) as sum664, sum(sales665) as sum665, sum(sales666) as sum666, sum(sales667) as sum667, sum(sales668) as sum668, sum(sales669) as sum669, sum(sales670) as sum670, sum(sales671) as sum671, sum(sales672) as sum672, sum(sales673) as sum673, sum(sales674) as sum674, sum(sales675) as sum675, sum(sales676) as sum676, sum(sales677) as sum677, sum(sales678) as sum678, sum(sales679) as sum679, sum(sales680) as sum680, sum(sales681) as sum681, sum(sales682) as sum682, sum(sales683) as sum683, sum(sales684) as sum684, sum(sales685) as sum685, sum(sales686) as sum686, sum(sales687) as sum687, sum(sales688) as sum688, sum(sales689) as sum689, sum(sales690) as sum690, sum(sales691) as sum691, sum(sales692) as sum692, sum(sales693) as sum693, sum(sales694) as sum694, sum(sales695) as sum695, sum(sales696) as sum696, sum(sales697) as sum697, sum(sales698) as sum698, sum(sales699) as sum699, sum(sales700) as sum700, sum(sales701) as sum701, sum(sales702) as sum702, sum(sales703) as sum703, sum(sales704) as sum704, sum(sales705) as sum705, sum(sales706) as sum706, sum(sales707) as sum707, sum(sales708) as sum708, sum(sales709) as sum709, sum(sales710) as sum710, sum(sales711) as sum711, sum(sales712) as sum712, sum(sales713) as sum713, sum(sales714) as sum714, sum(sales715) as sum715, sum(sales716) as sum716, sum(sales717) as sum717, sum(sales718) as sum718, sum(sales719) as sum719, sum(sales720) as sum720, sum(sales721) as sum721, sum(sales722) as sum722, sum(sales723) as sum723, sum(sales724) as sum724, sum(sales725) as sum725, sum(sales726) as sum726, sum(sales727) as sum727, sum(sales728) as sum728, sum(sales729) as sum729, sum(sales730) as sum730, sum(sales731) as sum731, sum(sales732) as sum732, sum(sales733) as sum733, sum(sales734) as sum734, sum(sales735) as sum735, sum(sales736) as sum736, sum(sales737) as sum737, sum(sales738) as sum738, sum(sales739) as sum739, sum(sales740) as sum740, sum(sales741) as sum741, sum(sales742) as sum742, sum(sales743) as sum743, sum(sales744) as sum744, sum(sales745) as sum745, sum(sales746) as sum746, sum(sales747) as sum747, sum(sales748) as sum748, sum(sales749) as sum749, sum(sales750) as sum750, sum(sales751) as sum751, sum(sales752) as sum752, sum(sales753) as sum753, sum(sales754) as sum754, sum(sales755) as sum755, sum(sales756) as sum756, sum(sales757) as sum757, sum(sales758) as sum758, sum(sales759) as sum759, sum(sales760) as sum760, sum(sales761) as sum761, sum(sales762) as sum762, sum(sales763) as sum763, sum(sales764) as sum764, sum(sales765) as sum765, sum(sales766) as sum766, sum(sales767) as sum767, sum(sales768) as sum768, sum(sales769) as sum769, sum(sales770) as sum770, sum(sales771) as sum771, sum(sales772) as sum772, sum(sales773) as sum773, sum(sales774) as sum774, sum(sales775) as sum775, sum(sales776) as sum776, sum(sales777) as sum777, sum(sales778) as sum778, sum(sales779) as sum779, sum(sales780) as sum780, sum(sales781) as sum781, sum(sales782) as sum782, sum(sales783) as sum783, sum(sales784) as sum784, sum(sales785) as sum785, sum(sales786) as sum786, sum(sales787) as sum787, sum(sales788) as sum788, sum(sales789) as sum789, sum(sales790) as sum790, sum(sales791) as sum791, sum(sales792) as sum792, sum(sales793) as sum793, sum(sales794) as sum794, sum(sales795) as sum795, sum(sales796) as sum796, sum(sales797) as sum797, sum(sales798) as sum798, sum(sales799) as sum799, sum(sales800) as sum800, sum(sales801) as sum801, sum(sales802) as sum802, sum(sales803) as sum803, sum(sales804) as sum804, sum(sales805) as sum805, sum(sales806) as sum806, sum(sales807) as sum807, sum(sales808) as sum808, sum(sales809) as sum809, sum(sales810) as sum810, sum(sales811) as sum811, sum(sales812) as sum812, sum(sales813) as sum813, sum(sales814) as sum814, sum(sales815) as sum815, sum(sales816) as sum816, sum(sales817) as sum817, sum(sales818) as sum818, sum(sales819) as sum819, sum(sales820) as sum820, sum(sales821) as sum821, sum(sales822) as sum822, sum(sales823) as sum823, sum(sales824) as sum824, sum(sales825) as sum825, sum(sales826) as sum826, sum(sales827) as sum827, sum(sales828) as sum828, sum(sales829) as sum829, sum(sales830) as sum830, sum(sales831) as sum831, sum(sales832) as sum832, sum(sales833) as sum833, sum(sales834) as sum834, sum(sales835) as sum835, sum(sales836) as sum836, sum(sales837) as sum837, sum(sales838) as sum838, sum(sales839) as sum839, sum(sales840) as sum840, sum(sales841) as sum841, sum(sales842) as sum842, sum(sales843) as sum843, sum(sales844) as sum844, sum(sales845) as sum845, sum(sales846) as sum846, sum(sales847) as sum847, sum(sales848) as sum848, sum(sales849) as sum849, sum(sales850) as sum850, sum(sales851) as sum851, sum(sales852) as sum852, sum(sales853) as sum853, sum(sales854) as sum854, sum(sales855) as sum855, sum(sales856) as sum856, sum(sales857) as sum857, sum(sales858) as sum858, sum(sales859) as sum859, sum(sales860) as sum860, sum(sales861) as sum861, sum(sales862) as sum862, sum(sales863) as sum863, sum(sales864) as sum864, sum(sales865) as sum865, sum(sales866) as sum866, sum(sales867) as sum867, sum(sales868) as sum868, sum(sales869) as sum869, sum(sales870) as sum870, sum(sales871) as sum871, sum(sales872) as sum872, sum(sales873) as sum873, sum(sales874) as sum874, sum(sales875) as sum875, sum(sales876) as sum876, sum(sales877) as sum877, sum(sales878) as sum878, sum(sales879) as sum879, sum(sales880) as sum880, sum(sales881) as sum881, sum(sales882) as sum882, sum(sales883) as sum883, sum(sales884) as sum884, sum(sales885) as sum885, sum(sales886) as sum886, sum(sales887) as sum887, sum(sales888) as sum888, sum(sales889) as sum889, sum(sales890) as sum890, sum(sales891) as sum891, sum(sales892) as sum892, sum(sales893) as sum893, sum(sales894) as sum894, sum(sales895) as sum895, sum(sales896) as sum896, sum(sales897) as sum897, sum(sales898) as sum898, sum(sales899) as sum899, sum(sales900) as sum900, sum(sales901) as sum901, sum(sales902) as sum902, sum(sales903) as sum903, sum(sales904) as sum904, sum(sales905) as sum905, sum(sales906) as sum906, sum(sales907) as sum907, sum(sales908) as sum908, sum(sales909) as sum909, sum(sales910) as sum910, sum(sales911) as sum911, sum(sales912) as sum912, sum(sales913) as sum913, sum(sales914) as sum914, sum(sales915) as sum915, sum(sales916) as sum916, sum(sales917) as sum917, sum(sales918) as sum918, sum(sales919) as sum919, sum(sales920) as sum920, sum(sales921) as sum921, sum(sales922) as sum922, sum(sales923) as sum923, sum(sales924) as sum924, sum(sales925) as sum925, sum(sales926) as sum926, sum(sales927) as sum927, sum(sales928) as sum928, sum(sales929) as sum929, sum(sales930) as sum930, sum(sales931) as sum931, sum(sales932) as sum932, sum(sales933) as sum933, sum(sales934) as sum934, sum(sales935) as sum935, sum(sales936) as sum936, sum(sales937) as sum937, sum(sales938) as sum938, sum(sales939) as sum939, sum(sales940) as sum940, sum(sales941) as sum941, sum(sales942) as sum942, sum(sales943) as sum943, sum(sales944) as sum944, sum(sales945) as sum945, sum(sales946) as sum946, sum(sales947) as sum947, sum(sales948) as sum948, sum(sales949) as sum949, sum(sales950) as sum950, sum(sales951) as sum951, sum(sales952) as sum952, sum(sales953) as sum953, sum(sales954) as sum954, sum(sales955) as sum955, sum(sales956) as sum956, sum(sales957) as sum957, sum(sales958) as sum958, sum(sales959) as sum959, sum(sales960) as sum960, sum(sales961) as sum961, sum(sales962) as sum962, sum(sales963) as sum963, sum(sales964) as sum964, sum(sales965) as sum965, sum(sales966) as sum966, sum(sales967) as sum967, sum(sales968) as sum968, sum(sales969) as sum969, sum(sales970) as sum970, sum(sales971) as sum971, sum(sales972) as sum972, sum(sales973) as sum973, sum(sales974) as sum974, sum(sales975) as sum975, sum(sales976) as sum976, sum(sales977) as sum977, sum(sales978) as sum978, sum(sales979) as sum979, sum(sales980) as sum980, sum(sales981) as sum981, sum(sales982) as sum982, sum(sales983) as sum983, sum(sales984) as sum984, sum(sales985) as sum985, sum(sales986) as sum986, sum(sales987) as sum987, sum(sales988) as sum988, sum(sales989) as sum989, sum(sales990) as sum990, sum(sales991) as sum991, sum(sales992) as sum992, sum(sales993) as sum993, sum(sales994) as sum994, sum(sales995) as sum995, sum(sales996) as sum996, sum(sales997) as sum997, sum(sales998) as sum998, sum(sales999) as sum999, sum(sales1000) as sum1000" +} +``` + + + + +### @qianheng-aws + + +> I try a more complex one, it works + +Single q30 query can run successfully as well on my local test. It won't always trigger that exception. + +As seen in the CI before ignoring q30, https://github.com/opensearch-project/sql/actions/runs/16497630130/job/46647212530, some tests can pass while others failed with exception. + +``` +PPLClickBenchIT > test FAILED + org.opensearch.client.ResponseException: method [POST], host [http://127.0.0.1:61272/], URI [/_plugins/_ppl], status line [HTTP/1.1 500 Internal Server Error] + { + "error": { + "reason": "There was internal problem at backend", + "details": "insufficient resources to run the query, quit.", + "type": "IllegalStateException" + }, + "status": 500 + } +``` + +I think it depends on the tension of the node on the specific point, maybe impacted by JVM GC. + + +### @penghuo + + + + + + +> > I try a more complex one, it works +> +> Single q30 query can run successfully as well on my local test. It won't always trigger that exception. +> +> As seen in the CI before ignoring q30, https://github.com/opensearch-project/sql/actions/runs/16497630130/job/46647212530, some tests can pass while others failed with exception. +> +> ``` +> PPLClickBenchIT > test FAILED +> org.opensearch.client.ResponseException: method [POST], host [http://127.0.0.1:61272/], URI [/_plugins/_ppl], status line [HTTP/1.1 500 Internal Server Error] +> { +> "error": { +> "reason": "There was internal problem at backend", +> "details": "insufficient resources to run the query, quit.", +> "type": "IllegalStateException" +> }, +> "status": 500 +> } +> ``` +> +> I think it depends on the tension of the node on the specific point, maybe impacted by JVM GC. + +Script push down will trigger exception. We can track the solution in seperate PR. +``` + { + "error": { + "reason": "Error occurred in OpenSearch engine: all shards failed", + "details": "Shard[0]: CircuitBreakingException[[script] Too many dynamic script compilations within, max: [75/5m]; please use indexed, or scripts with parameters instead; this limit can be changed by the [script.context.aggs.max_compilations_rate] setting]\n\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", + "type": "SearchPhaseExecutionException" + }, + "status": 429 + } +``` + + +--- + +# PR #3915: Convert like function call to wildcard query for Calcite filter pushdown + +**URL:** https://github.com/opensearch-project/sql/pull/3915 + +**Author:** @songkant-aws + +**Created:** 2025-07-23T10:23:47Z + +**State:** MERGED + +**Merged:** 2025-07-31T03:11:18Z + +**Changes:** +267 -82 (21 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Enhancement: + +In V2 pushdown logic, like function is translated to OpenSearch DSL wildcard query but it's converted to UDF script in Calcite filter pushdown. This change makes sure Calcite has parity functionality. + +Text field wildcard query has inconsistent behavior with SQL like function. So this PR also proposes to not pushdown like query for text field. Instead, we should only support pushdown like query for keyword field. + +### Related Issues +Resolves #3428 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java:67` + + +Question: +With the patch, both in v2 and v3, predicate `WHERE Like(TextKeywordBody, 'test%')` will trigger wildcard query pushdown but predicate `WHERE Like(TextBody, 'test%')` won't trigger any pushdown. right? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:None` + + +Is the `email` a keyword or not? please add two tests for +- keyword (e.g. firstname) +- text (e.g. address) + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java:None` + + +[Minor] Change to use `Assume` to skip the test if `isPushdownEnabled` is false. + +``` +Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); +``` + + + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1168` + + +Text field can also be push down as script, Can we track it as enhancement issue? + +And add a Notes to explain current limitation on Text field support in [LIKE](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/string.rst#like). + +``` +### +PUT {{baseUrl}}/demo-index +Content-Type: application/x-ndjson + +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "properties": { + "region": { "type": "text" }, + "sales": { "type": "double" } + } + } +} + +### +POST {{baseUrl}}/demo-index/_bulk +Content-Type: application/x-ndjson + +{ "index": {} } +{ "region": "us-east", "sales": 100.12 } +{ "index": {} } +{ "region": "us-east", "sales": 200.34 } +{ "index": {} } +{ "region": "us-west", "sales": 300.65 } +{ "index": {} } +{ "region": "us-west", "sales": 400.78 } + +### +POST {{baseUrl}}/demo-index/_search +Content-Type: application/x-ndjson + +{ + "derived": { + "region_runtime": { + "type": "keyword", + "script": { + "source": "emit(params._source['region'])" + } + } + }, + "query": { + "term": { + "region_runtime": { + "value": "us-east" + } + } + } +} +``` + + + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java:47` + + +Is it for solving existing bug? + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java:67` + + +Yes + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:None` + + +Done + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java:None` + + +Done + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1168` + + +Added limitation to LIKE function doc. + +I tried pushdown like function script for text field. However, getting ScriptDocValues for text field throws exception to not recommend to do it. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java:47` + + +Yes, DSL wildcard matching for text field has different behavior with SQL like function because of tokenization. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1168` + + +Yes, text data does not have doc values, we should use source, e.g. `params._source['region']`. Let's track it in issue, not high priority. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1168` + + +Makes sense. Created a tracking issue: https://github.com/opensearch-project/sql/issues/3950. Hopefully, we can find a way to read script values from source. + + +## General Comments + + +### @songkant-aws + + +@penghuo Need to confirm some behavior. +SQL like function uses '_' to match single character and '%' to match zero or more characters. +DSL wildcard query uses '?' to match single character and '*' to match zero or more characters. + +When like function is pushed down as DSL wildcard query, V2 converts '_' and '%' to '?' and '\*' respectively but it doesn't handle '?' and '\*'. So when user enters "Amber*", he may just expect to match exactly "Amber*" term but DSL will do wildcard match to return "Amber*", "Amber A", "Amber B". + +Question: Since this seems to be incorrect but also a breaking change, should we correct this behavior? Should we totally align with SQL like function? + + +### @LantaoJin + + +> @penghuo Need to confirm some behavior. SQL like function uses '_' to match single character and '%' to match zero or more characters. DSL wildcard query uses '?' to match single character and '*' to match zero or more characters. +> +> When like function is pushed down as DSL wildcard query, V2 converts '_' and '%' to '?' and '*' respectively but it doesn't handle '?' and '*'. So when user enters "Amber*", he may just expect to match exactly "Amber*" term but DSL will do wildcard match to return "Amber*", "Amber A", "Amber B". +> +> Question: Since this seems to be incorrect but also a breaking change, should we correct this behavior? Should we totally align with SQL like function? + +I think we should fix this bug. Ref [here](https://docs.splunk.com/Documentation/Splunk/9.4.2/Search/Wildcards) and other databases SQL functions, the `_` and `%` are wildcard characters only for LIKE function. But `*` can be wildcard in other commands/functions, such **match** or **query_string**. + + +--- + +# PR #3914: Support function argument coercion with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3914 + +**Author:** @yuancu + +**Created:** 2025-07-23T07:51:29Z + +**State:** MERGED + +**Merged:** 2025-07-31T10:14:02Z + +**Changes:** +666 -338 (41 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + +Support coercing argument types to expected ones with Calcite. This eliminates the necessities to implement multiple overrides for a function where the arguments can actually be implicitly cast. + +This PR implements the alternative solution in RFC #3761 + +**what's working** +- argument type coercion of functions +- comparator type widening + +### Implementation notes + +Type casting leverages [WideningTypeRule](https://github.com/opensearch-project/sql/blob/91bbb5dd85950a4fda37878b7b612adba7a647fb/core/src/main/java/org/opensearch/sql/data/type/WideningTypeRule.java#L41) from v2 to examine whether it is possible to cast from the argument type to the expected parameter type. + +I use `ExprType` as a common ground for type comparison and coercion. In comparison, the validation phase of Calcite uses `SqlTypeFamily` and `SqlTypeName`. However, as we have UDTs that are not covered by types in Calcite, using `ExprType` is more practical in our case. + + +**Test coverage** +- implicit coercion in comparators is covered by + - to date: [testFilterByCompareStringDatePushDownExplain](https://github.com/opensearch-project/sql/blob/b3c01770a0b2cabadce6a3ce4a9134e70de72331/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java#L69) + - to time: [testFilterByCompareStringTimePushDownExplain](https://github.com/opensearch-project/sql/blob/b3c01770a0b2cabadce6a3ce4a9134e70de72331/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java#L80) + - to timestamp: [testFilterByCompareStringTimestampPushDownExplain](https://github.com/opensearch-project/sql/blob/b3c01770a0b2cabadce6a3ce4a9134e70de72331/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java#L58C15-L58C64) + - to IP: [testFilterByCompareIPExplain]() +- implicit coercion of function arguments is covered by: + - [testWeekArgumentCoercion](): cast a incompatible argument type (string) to timestamp. + - besides, function signatures are updated if the argument can be coerced. E.g. `cidrmatch(string, string)` is removed from the signatures of `cidrmatch`, as it the arguments of `(string, string)` is expected to be coerced to `(ip, string)`. Properly working of these functions also verifies the functionality of type coercion. + +### Related Issues +#3761 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +Are all behaviours same with v2? If not, please add/update the use doc + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java:30` + + +How about change the `UDFOperandMetadata STRING` to `UDFOperandMetadata CHARACTER` since CHARACTER is the primary family. ditto for the rests. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/FromUnixTimeFunction.java:57` + + +IMO, it's more readable to keep the UDFOperandMetadata in the function definition: +``` + @Override + public UDFOperandMetadata getOperandMetadata() { + return UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.NUMERIC.or( + OperandTypes.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING))); + } +``` +``` + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.NUMERIC_OPTIONAL_STRING; + } +``` + +Which one would be better? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java:30` + + +I did not rename it since I wanted to make the distinction between PPL types and its underlying implementation with Calcite -- string is a type in PPL, while character (char & varchar) is its implementation with Calcite. + +It also makes sense to rename `PPLOperandTypes.STRING` to `PPLOperandTypes.CHARACTER` though. This aligns the naming with its implementation. Which do you think is more reasonable? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/FromUnixTimeFunction.java:57` + + +The second one complies with Calcite's convention when naming operand types in [OperandTypes.java](https://github.com/apache/calcite/blob/main/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java) + +Besides, another motivation for making this update was to gather all defined operand types in a central repository so that when I have to make updates to them, it's less likely that I carelessly ignored one of them -- this happened this time when I updated the operand types. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/FromUnixTimeFunction.java:57` + + +I don't think setting the operand type in each function class would make your updating this time any harder. And IMO , gathering all operand types in a central class make the function definition unreadable, except they are always reused and duplicated. I prefer to define acceptable operand types in each function even there is a little bit duplication. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/FromUnixTimeFunction.java:57` + + +I agree that having the definition expanded will make what's under the hood more clear. Yet it may not necessarily be a detail that has to be reiterated at each function definition. + +IMO, the latter is more readable. And this is exactly how Calcite defines operand type checkers. E.g. +``` +public static final SqlSingleOperandTypeChecker NUMERIC_OPTIONAL_INTEGER = + NUMERIC.or(NUMERIC_INTEGER); +``` + +The pattern is quite easy to grasp. The added benefit is that, when you want to create a new operand type checker, you can just type the operand types with this pattern -- the corresponding type checker will automatically jump out if they are already defined and used by other functions. On the other hand, without this, you will risking redefining what's already out there. What's worse, you may update one and neglect another even if two functions are supposed to have the same operand types. It occurred this time when I update function definitions by removing parameter types that are ought to be coerced. + +With this centralized class, I also don't have to make the following statement even longer: +``` + public static final SqlOperator TIME_TO_SEC = + adaptExprMethodToUDF( + DateTimeFunctions.class, + "exprTimeToSec", + ReturnTypes.BIGINT_FORCE_NULLABLE, + NullPolicy.ARG0, + PPLOperandTypes.TIME_OR_TIMESTAMP_OR_STRING) +``` + +Moreover, previously, we already have some UDFs with their operand type checkers defined in `PPLOperandTypes` if the checker is shared, while those are not shared are defined directly in `getOperandMetadata`. This left a divided representation. It does no harm to unify and collect them into `PPLOperandTypes`. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/udf/datetime/FromUnixTimeFunction.java:57` + + +Okey, I see. The `PPLOperandTypes.xx` is not only used in `getOperandMetadata()` but also used in definition of custom `SqlOperator`. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:69` + + +[non-blocking]I would prefer using Type system defined in Calcite like `RelDataType` or SqlType. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:125` + + +[non-blocking]Calcite has similar operation by using `RelDataTypeFactory::leastRestrictive`. If it's functional equivalent as this, I would prefer use that one. + +We may deprecate ExprValue and ExprType in the future. But for now, it's OK to keep using this one. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:69` + + +I used `ExprType` since it has a full coverage of PPL types. I will also have to convert `RelDataType` to either `SqlTypeName` or `ExprType` for comparison purpose. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java:125` + + +Good idea, we could override `leastRestrictive` in favor of `RelDataType` in the future. + + +## General Comments + + +### @yuancu + + +Need #3919 to function properly. + + +### @yuancu + + +Comparing IP is not pushed down as a range query (but as script). I am looking into this and will raise another issue and PR to fix it. + + +### @yuancu + + +Yes, all behaviors remain the same. If a function is declared to accept string, it will not coerce the string to a certain type. This is because it is ought to give certain response (like `null`) for malformatted input. + +E.g. `timestamp(string/timestamp/date/time)` was not modified do `timestamp(timestamp/date/time)`. + + +--- + +# PR #3911: [Backport main] add release notes for 2.19.3 + +**URL:** https://github.com/opensearch-project/sql/pull/3911 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-22T16:49:41Z + +**State:** MERGED + +**Merged:** 2025-07-24T17:54:38Z + +**Changes:** +13 -0 (1 files) + +**Labels:** `maintenance` + + +## Description + +Backport 692e5af9cfc7ac7fbe3c2ac5de9071f9d5f1d6d6 from #3910. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3910: add release notes for 2.19.3 + +**URL:** https://github.com/opensearch-project/sql/pull/3910 + +**Author:** @joshuali925 + +**Created:** 2025-07-22T16:43:13Z + +**State:** MERGED + +**Merged:** 2025-07-22T16:49:27Z + +**Changes:** +13 -0 (1 files) + +**Labels:** `backport main` + + +## Description + +### Description +add release notes for 2.19.3 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3909: ML command supports category_field parameter + +**URL:** https://github.com/opensearch-project/sql/pull/3909 + +**Author:** @gaobinlong + +**Created:** 2025-07-22T07:47:21Z + +**State:** MERGED + +**Merged:** 2026-01-06T08:03:35Z + +**Changes:** +58 -17 (2 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +From the document of [ML command](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/ml.rst), it shows that ml supports `category_field` command, but actually it doesn't work. This PR makes ML command supports category_field parameter. + +Request: +``` +POST _plugins/_ppl?format=jdbc +{ + "query":"source = abcd_test | eval value = cast(value as double) | fields value, category | ml action='trainandpredict' algorithm='rcf' input='value' category_field='category'" +} +``` +Response: +``` +{ + "schema": [ + { + "name": "value", + "type": "double" + }, + { + "name": "category", + "type": "string" + }, + { + "name": "score", + "type": "double" + }, + { + "name": "anomalous", + "type": "boolean" + } + ], + "datarows": [ + [ + 1, + "a", + 0, + false + ], + [ + 2, + "b", + 0, + false + ] + ], + "total": 2, + "size": 2 +} +``` + + +### Related Issues +https://github.com/opensearch-project/sql/issues/3406 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/MLOperator.java:53` + + +`categoryField` is null will throw NPE in `generateCategorizedInputDataset` + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/MLOperator.java:53` + + +How so? `generateCategorizedInputDataset` has null checking: https://github.com/opensearch-project/sql/blob/0c1ec27da389be9e434158d67df8dbc993bc45fe/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/MLCommonsOperatorActions.java#L83 + +If we want, we can add a `@Nullable` annotation to that field to document that contract in the signature + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/MLOperator.java:53` + + +Oh, it depends on the what kind of Map it used, seems `HashMap` can handle null key for `computeIfAbsent(key)`, but `ConcurrentHashMap` and other kinds of Map throws NPE. + + +## General Comments + + +### @gaobinlong + + +@LantaoJin @qianheng-aws @songkant-aws please help to review this PR, thanks! + + +### @songkant-aws + + +LGTM + + +### @songkant-aws + + +@LantaoJin @qianheng-aws @yuancu Need other reviews. + + +### @Swiddis + + +@LantaoJin can you re-review? + + +--- + +# PR #3908: [Backport 2.19-dev]Push down QUERY_SIZE_LIMIT (#3880) + +**URL:** https://github.com/opensearch-project/sql/pull/3908 + +**Author:** @qianheng-aws + +**Created:** 2025-07-22T06:22:54Z + +**State:** MERGED + +**Merged:** 2025-07-22T22:05:19Z + +**Changes:** +25 -11 (7 files) + + +## Description + +* Push down QUERY_SIZE_LIMIT + + + +* Fix compiling + + + +* Use QUERY_SIZE_LIMIT in osIndex settings directly + + + +* Revert code + + + +* SpotlessApply after resolving conflict + + + +--------- + + + +(cherry picked from commit 0bded2aa4d9c80e07929fdc91bac1f74d1e18f7c) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3907: [Backport 2.19-dev] [BugFix] Fix incorrect push down for Sarg with nullAs is TRUE (#3882) + +**URL:** https://github.com/opensearch-project/sql/pull/3907 + +**Author:** @qianheng-aws + +**Created:** 2025-07-22T06:18:41Z + +**State:** MERGED + +**Merged:** 2025-07-22T19:04:06Z + +**Changes:** +122 -36 (4 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3906: [Backport 2.19-dev] Allow warning header for yaml test + +**URL:** https://github.com/opensearch-project/sql/pull/3906 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-22T06:09:45Z + +**State:** MERGED + +**Merged:** 2025-07-22T07:59:18Z + +**Changes:** +37 -1 (11 files) + + +## Description + +Backport 877835e976ba26a852cffb105afe280a8411117f from #3846. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3905: [Backport 2.19-dev] Filter script pushdown with RelJson serialization in Calcite (#3859) + +**URL:** https://github.com/opensearch-project/sql/pull/3905 + +**Author:** @songkant-aws + +**Created:** 2025-07-22T06:05:03Z + +**State:** MERGED + +**Merged:** 2025-07-25T20:33:44Z + +**Changes:** +1737 -124 (49 files) + + +## Description + +### Description +Backport/backport 3859 to 2.19 dev + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +ping @qianheng-aws @penghuo + + +--- + +# PR #3904: [Backport 2.19-dev] Support full expression in WHERE clauses (#3849) + +**URL:** https://github.com/opensearch-project/sql/pull/3904 + +**Author:** @LantaoJin + +**Created:** 2025-07-22T04:06:19Z + +**State:** MERGED + +**Merged:** 2025-07-22T22:06:25Z + +**Changes:** +271 -152 (7 files) + + +## Description + +* Support full expression in WHERE clauses + + + +* add unit tests + + + +* revert typo + + + +* Fix IT + + + +* Fix IT + + + +* Address comment + + + +--------- + + +(cherry picked from commit 988ab2ea13d533f761558c18c0844c4f206a18d6) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +CI failure should be resolved after https://github.com/opensearch-project/sql/pull/3903 merged. + + +--- + +# PR #3903: [Backport 2.19-dev] CVE-2025-48924: upgrade commons-lang3 to 3.18.0 (#3895) + +**URL:** https://github.com/opensearch-project/sql/pull/3903 + +**Author:** @LantaoJin + +**Created:** 2025-07-22T03:17:56Z + +**State:** MERGED + +**Merged:** 2025-07-22T22:06:01Z + +**Changes:** +3 -2 (2 files) + + +## Description + +* CVE-2025-48924: upgrade commons-lang3 to 3.18.0 + + + +* Exclude the dependency commons-lang + + + +--------- + + +(cherry picked from commit 96d0d1434f7736f8e697722ec8ddd95122be7d2a) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +CI failure should be resolved automatically when this PR merged. + + +--- + +# PR #3902: [Backport 2.x] CVE-2025-48924: upgrade commons-lang3 to 3.18.0 (#3895) + +**URL:** https://github.com/opensearch-project/sql/pull/3902 + +**Author:** @LantaoJin + +**Created:** 2025-07-22T03:14:15Z + +**State:** MERGED + +**Merged:** 2025-07-28T08:28:09Z + +**Changes:** +4 -3 (4 files) + + +## Description + +Manually backport https://github.com/opensearch-project/sql/pull/3895 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3901: Fix flaky tests in `RestHandlerClientYamlTestSuiteIT` + +**URL:** https://github.com/opensearch-project/sql/pull/3901 + +**Author:** @LantaoJin + +**Created:** 2025-07-21T06:02:30Z + +**State:** MERGED + +**Merged:** 2025-07-21T19:41:20Z + +**Changes:** +9 -7 (3 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `flaky-test`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Fix 3 flaky tests in `RestHandlerClientYamlTestSuiteIT` + +### Related Issues +Resolves #3900 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +The auto-backport failure was caused by missing relevant backporting of https://github.com/opensearch-project/sql/pull/3882 + + +--- + +# PR #3899: [Backport 2.19-dev] Support partial filter push down (#3850) + +**URL:** https://github.com/opensearch-project/sql/pull/3899 + +**Author:** @qianheng-aws + +**Created:** 2025-07-21T02:44:02Z + +**State:** MERGED + +**Merged:** 2025-07-21T18:42:18Z + +**Changes:** +281 -182 (8 files) + + +## Description + +* Support partial filter push down + + + +* Add doc for PushDownAction + + + +* Fix IT + + + +* Refine code to only keep non-push-down condition in the new filter + + + +* Refine code + + + +* Only show the pushed conditions in the PushDownContext + + + +* Ignore test when push down disabled + + + +* Fix IT after merging main + + + +* Fix IT because mapping changed after merging main + + + +--------- + + +(cherry picked from commit 0b4423e9f3b50670af922a8608116d7182fd728f) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +CI failure related to https://github.com/opensearch-project/sql/pull/3895 + + +--- + +# PR #3897: Add 'testing' and 'security fix' to enforce-label-action + +**URL:** https://github.com/opensearch-project/sql/pull/3897 + +**Author:** @LantaoJin + +**Created:** 2025-07-18T04:41:53Z + +**State:** MERGED + +**Merged:** 2025-07-21T19:41:32Z + +**Changes:** +2 -2 (1 files) + +**Labels:** `infrastructure`, `skip-changelog` + + +## Description + +### Description +Add 'testing' and 'security fix' to enforce-label-action + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `.github/workflows/enforce-labels.yml:13` + + +what is testing means? fix test? + + +### @LantaoJin on `.github/workflows/enforce-labels.yml:13` + + +Test related issue. Such fix tests, fix flaky tests, add new tests, fix test coverage etc. “enhancement” or “bug” are both not the optimal labels. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3895: CVE-2025-48924: upgrade commons-lang3 to 3.18.0 + +**URL:** https://github.com/opensearch-project/sql/pull/3895 + +**Author:** @LantaoJin + +**Created:** 2025-07-18T04:31:57Z + +**State:** MERGED + +**Merged:** 2025-07-21T18:43:52Z + +**Changes:** +3 -2 (2 files) + +**Labels:** `enhancement`, `security fix`, `backport 2.x`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Fix [CVE-2025-48924](https://github.com/advisories/GHSA-j288-q9x7-2f5v) + +https://github.com/opensearch-project/opensearch-build/issues/5637 + +- upgrade commons-lang3 to 3.18.0 +- remove commons-lang (introduced by net.hydromatic:aggdesigner-algorithm:6.0 which is not used in our code. aggdesigner is used when the schema contains Lattice entities) + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @gaiksaya + + +Hi everyone, +Can we please backport to 2.19 branch? Looks like it is showing up there too + + +### @gaiksaya + + +Tried https://github.com/opensearch-project/sql/pull/4693 please see if it makes sense + + + +--- + +# PR #3894: [Backport 2.19-dev] [BugFix] Fix the count() only aggregation pushdown issue + +**URL:** https://github.com/opensearch-project/sql/pull/3894 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-18T03:08:41Z + +**State:** MERGED + +**Merged:** 2025-07-18T05:03:53Z + +**Changes:** +72 -0 (7 files) + + +## Description + +Backport b5d31b405310031602615160ab023b34642d6d5f from #3891. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Security Check failure is not related. Ref https://github.com/opensearch-project/sql/pull/3895 + + +--- + +# PR #3892: [Backport 2.19] Update the maven snapshot publish endpoint and credential + +**URL:** https://github.com/opensearch-project/sql/pull/3892 + +**Author:** @RyanL1997 + +**Created:** 2025-07-17T19:07:45Z + +**State:** MERGED + +**Merged:** 2025-07-23T16:10:13Z + +**Changes:** +11 -10 (4 files) + +**Labels:** `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Update the maven snapshot publish endpoint and credential + +### Related Issues +* Backport #3886 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +seems like the security plugin didnt have snapshot for `2.19.3` yet: https://central.sonatype.com/artifact/org.opensearch.plugin/opensearch-security/2.19.2.0/dependents + + +### @LantaoJin + + +why do we backport it to 2.19 rather than 2.x? + + +### @RyanL1997 + + +> why do we backport it to 2.19 rather than 2.x? + +Since we have already moved to 3.x era for major version release, I think the 2.19 should be the last minor version we support for future 2.x patch releases. Similar thing was also applied to 1.3 branch when we first released 2.0 - when we do the backport of 1.x changes we directly backport it to 1.3. cc @LantaoJin + + +### @LantaoJin + + +> > why do we backport it to 2.19 rather than 2.x? +> +> Since we have already moved to 3.x era for major version release, I think the 2.19 should be the last minor version we support for future 2.x patch releases. Similar thing was also applied to 1.3 branch when we first released 2.0 - when we do the backport of 1.x changes we directly backport it to 1.3. cc @LantaoJin + +Oh, sounds we had many 2.x backportings which should be backported to 2.19 instead. For example, https://github.com/opensearch-project/sql/compare/2.19...2.x + + +### @RyanL1997 + + +> Oh, sounds we had many 2.x backportings which should be backported to 2.19 instead. For example, https://github.com/opensearch-project/sql/compare/2.19...2.x + +@LantaoJin If there is a fix for security vulnerability/maintenance (like this PR) we can backport it to 2.19 directly. New features should not be backport to 2.19 anymore. + + +### @LantaoJin + + +@RyanL1997 please manually backport to 2.19-dev if you think it was still required. The 2.19-dev branch is only for AOS internal backporting + + +--- + +# PR #3891: [BugFix] Fix the count() only aggregation pushdown issue + +**URL:** https://github.com/opensearch-project/sql/pull/3891 + +**Author:** @LantaoJin + +**Created:** 2025-07-17T10:00:58Z + +**State:** MERGED + +**Merged:** 2025-07-18T03:08:25Z + +**Changes:** +72 -0 (7 files) + +**Labels:** `bug`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Fix the count() only aggregation pushdown issue + +### Related Issues +Resolves #3890 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3889: [Backport 2.19-dev] Support pushdown physical sort operator to speedup SortMergeJoin (#3864) + +**URL:** https://github.com/opensearch-project/sql/pull/3889 + +**Author:** @LantaoJin + +**Created:** 2025-07-17T07:09:08Z + +**State:** MERGED + +**Merged:** 2025-07-21T18:41:48Z + +**Changes:** +421 -323 (26 files) + + +## Description + +Backport 71aa9ba0490305b81284eb369f212a8fc6e3936f from #3864. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +CI failure related to #3895 + + +--- + +# PR #3887: [Backport 2.19] Backport 3797 to 2.x + +**URL:** https://github.com/opensearch-project/sql/pull/3887 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-16T23:19:07Z + +**State:** MERGED + +**Merged:** 2025-07-16T23:22:23Z + +**Changes:** +0 -1 (1 files) + + +## Description + +Backport 433bb9ed1aa802e3e73525dadb99558a9e11c6d0 from #3800. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3886: Update the maven snapshot publish endpoint and credential + +**URL:** https://github.com/opensearch-project/sql/pull/3886 + +**Author:** @RyanL1997 + +**Created:** 2025-07-15T21:30:14Z + +**State:** MERGED + +**Merged:** 2025-07-16T20:07:37Z + +**Changes:** +32 -32 (7 files) + +**Labels:** `infrastructure`, `ci`, `maintenance`, `backport-failed`, `backport 2.19`, `backport 2.19-dev` + + +## Description + +### Description +Update the maven snapshot publish endpoint and credential + +Previous description from #3806 : +> Update the Maven Snapshots publish URL in accordance with the recent Sonatype migration. +[central.sonatype.org/publish/publish-portal-snapshots](https://central.sonatype.org/publish/publish-portal-snapshots/) +We have stored the onepassword token in this repo secrets and new credentials for Sonatypes username & password have been stored in onepassword. These credentials will be exported as env variables which used by maven publish. + +### Related Issues +* Relate https://github.com/opensearch-project/opensearch-build/issues/5551 +* Resolves #3856 +* Resolves the conflict of #3806 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +lgtm + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @RyanL1997 + + +> * What went wrong: +A problem occurred evaluating project ':integ-test'. +> de.undercouch.gradle.tasks.download.org.apache.hc.client5.http.ClientProtocolException: Not Found (HTTP status code: 404, URL: https://central.sonatype.com/service/rest/repository/browse/maven-snapshots/org/opensearch/plugin/opensearch-security/3.1.0.0-SNAPSHOT/maven-metadata.xml) + +Hi @gaiksaya , could you help me to check if im doing something incorrect? cuz i saw that we are getting 404 for the above url that I got from the previous PR #3806. Thanks! + + +### @gaiksaya + + +Hi @RyanL1997 + +I dont think the snapshots are browser-able. I am seeing that they are being published actively for other repos. +Ref: https://github.com/opensearch-project/security/actions/workflows/maven-publish.yml + +Eventually we might move to some other platform to store them as they were intended to be used as a testing mechanism before publishing to maven central instead of using them as dependencies. + + +### @RyanL1997 + + +will do the manual backport + + +--- + +# PR #3882: [BugFix] Fix incorrect push down for Sarg with nullAs is TRUE + +**URL:** https://github.com/opensearch-project/sql/pull/3882 + +**Author:** @qianheng-aws + +**Created:** 2025-07-15T09:53:32Z + +**State:** MERGED + +**Merged:** 2025-07-16T23:53:41Z + +**Changes:** +122 -36 (4 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Fix incorrect push down for Sarg with nullAs is TRUE + +Before this PR, `PredicateAnalyzer` ignore the nullAs properties in Sarg and leads to incorrect results. +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3881 +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@qianheng-aws please manually backport it to 2.19-dev. + + +--- + +# PR #3880: Push down QUERY_SIZE_LIMIT + +**URL:** https://github.com/opensearch-project/sql/pull/3880 + +**Author:** @qianheng-aws + +**Created:** 2025-07-15T06:55:04Z + +**State:** MERGED + +**Merged:** 2025-07-17T18:06:15Z + +**Changes:** +25 -11 (7 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Try to push down QUERY_SIZE_LIMIT if it's a simple plan with only scan. + +This PR implements this by only pushing down QUERY_SIZE_LIMIT into the `OpenSearchRequestBuilder` of the final plan if it only contains a single scan operator. + +This PR also fixes a bug in LIMIT push down by correct the cost computing logic. This bug will prevent multiple limit push down in some cases, see `CalciteExplainIT::testMultipleLimitExplain`. + + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3879 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - DISMISSED + + +Note, the current implementation is option 1. I am also open to this option so I will approve it. @penghuo please merge it if the current implementation works for you too. + + +## Review Comments + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +Could we append Limit operator on each query instead of OpenSearchDriver constructor. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +That's a workable options as well, I put it in the alternative solution. https://github.com/opensearch-project/sql/issues/3879#issue-3231030764. + + It has side effect of changing the original plan if the append limit operator can not be push down. + +OPTION1: Push down QUERY_SIZE_LIMIT to the final single scan +- PROS: Won't change the plan, and the optimization process is efficient and straight forward +- CONS: Only improve on the restricted case of the single scan + +OPTION2: Append LIMIT operator on the original plan +- PROS: Has improvement on more cases than single scan. e.g `Project-Scan` since it has `SortProjectTransposeRule` to swap `Limit` before the `Project`. +- CONS: Will change the final plan if Limit operator cannot be push down + + Which option do we prefer? @penghuo @LantaoJin + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +If we implement eval push down in the future, OPTION2 won't have its PROS since we can support pushing down all kinds of Project and left only a single Scan + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +Option 1 is more like a specific optimization for the setting query_size. I prefer to option 2 with a new LogicalSort such `LogicalQueryLimit`. Anyway, I am ok to both Option 1 and 2. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + ++1 on option2, + +> CONS: Will change the final plan if Limit operator cannot be push down + +Could you elaborate on this? Are you suggesting that adding a LIMIT clause would change the result of the EXPLAIN plan? If that’s the concern, I’m okay with it. We explicitly enforce a querySizeLimit for every query anyway. + + +Latest impl is better, scanWithLimit is clean to me. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:None` + + +> Are you suggesting that adding a LIMIT clause would change the result of the EXPLAIN plan? + +Yes. + +Changing the plan will have suspicion of changing customers' intension although we do have restriction on the final results. + +And I was wondering why Hive or Spark doesn't do similar optimization of appending limit operator, it's said (by LLM) that the current implementation of counting row count on the final iteration has other advantages including: +1. Plan reuse for cases like pagination. Although we don't have such feature for PPL, but maybe needed in the future. +2. Better memory management. It currently doesn't applies for us since the whole process happens in one coordinator. But it will make sense once we change to distribution execution. +3. Keep the plan semantically equivalent to user's SQL. + +So for long term consideration, keeping the plan unchanged makes more sense and is a standard practice. + + +## General Comments + + +### @LantaoJin + + +@qianheng-aws please backport to 2.19-dev. + + + +--- + +# PR #3878: Support ```bin``` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3878 + +**Author:** @ahkcs + +**Created:** 2025-07-14T22:58:51Z + +**State:** MERGED + +**Merged:** 2025-08-25T17:09:16Z + +**Changes:** +5641 -6 (71 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +## Summary + +This PR implements a complete separation of the `bin` command's `span` functionality from aggregation `span` logic, ensuring backward compatibility while introducing robust SPL-compatible binning behavior. This includes support for parameters such as `span`, `bins`, `minspan`, `start`, `end`, and `aligntime`. + +--- +## Related Issues + +Resolves #3876 + +## Test Results + +Tests were run on the following real datasets: + +* **`accounts.json`**: 1000 records, `age` values 20–40, `balance` values \~1000–50000 +* **`time_test_data.json`**: 100 records, timestamps from 2025-07-28 to 2025-08-01 + +--- + +### 1. `span` Parameter (Numeric) + +**Query**: + +``` +source=accounts | bin age span=10 | stats count() by age | head 5 +``` + +**Result**: + +```json +[ + [504, "30-40"], + [451, "20-30"], + [45, "40-50"] +] +``` + + + + +--- + +### 2. `bins` Parameter + +**Query**: + +``` +source=accounts | bin age bins=3 | stats count() by age | head 5 +``` + +**Result**: + +```json +[ + [504, "30-40"], + [451, "20-30"], + [45, "40-50"] +] +``` + + +--- + +### 3. `minspan` Parameter + +**Query**: + +``` +source=accounts | bin balance minspan=5000 | stats count() by balance | head 5 +``` + +**Result**: + +```json +[ + [187, "30000-40000"], + [215, "40000-50000"], + [213, "10000-20000"], + [168, "0-10000"], + [217, "20000-30000"] +] +``` + + +--- + +### 4. `start` and `end` Parameters + +**Query**: + +``` +source=accounts | bin balance span=10000 start=5000 end=45000 | stats count() by balance | head 5 +``` + +**Result**: + +```json +[ + [187, "30000-40000"], + [215, "40000-50000"], + [213, "10000-20000"], + [168, "0-10000"], + [217, "20000-30000"] +] +``` + +**Logical Plan**: + + +--- + +### 5. `aligntime="@d"` Parameter + +**Query**: + +``` +source=time_test | bin @timestamp span=1hour aligntime=\"@d+3h\" | fields @timestamp | head 5 +``` + +**Result**: + +```json + ["2025-07-28 00:00:00"], + ["2025-07-28 01:00:00"], + ["2025-07-28 02:00:00"], + ["2025-07-28 03:00:00"], + ["2025-07-28 04:00:00"] +``` + + + + +--- + + + +Bin Performance Test + + +``` +=== BIN COMMAND PERFORMANCE TEST (SIMPLIFIED) === +Testing pure bin operations on big5 index + +=== NUMERIC BIN OPERATIONS (metrics.size field) === +🧪 Testing: Bin metrics.size with span=500 +📝 Query: source=big5 | bin `metrics.size` span=500 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 16.05ms + ✅ Iteration 2: 15.63ms + ✅ Iteration 3: 15.68ms + ✅ Iteration 4: 15.55ms + ✅ Iteration 5: 15.73ms +📊 Average time: 15.72ms (5/5 successful) + +🧪 Testing: Bin metrics.size with span=1000 +📝 Query: source=big5 | bin `metrics.size` span=1000 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 16.09ms + ✅ Iteration 2: 16.42ms + ✅ Iteration 3: 16.18ms + ✅ Iteration 4: 16.02ms + ✅ Iteration 5: 16.06ms +📊 Average time: 16.15ms (5/5 successful) + +🧪 Testing: Bin metrics.size with bins=5 +📝 Query: source=big5 | bin `metrics.size` bins=5 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.93ms + ✅ Iteration 2: 15.99ms + ✅ Iteration 3: 15.74ms + ✅ Iteration 4: 15.73ms + ✅ Iteration 5: 15.95ms +📊 Average time: 15.86ms (5/5 successful) + +🧪 Testing: Bin metrics.size with minspan=100 +📝 Query: source=big5 | bin `metrics.size` minspan=100 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 16.59ms + ✅ Iteration 2: 16.24ms + ✅ Iteration 3: 16.40ms + ✅ Iteration 4: 16.02ms + ✅ Iteration 5: 16.35ms +📊 Average time: 16.32ms (5/5 successful) + +🧪 Testing: Basic bin metrics.size span=200 +📝 Query: source=big5 | bin `metrics.size` span=200 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.52ms + ✅ Iteration 2: 15.70ms + ✅ Iteration 3: 15.50ms + ✅ Iteration 4: 15.36ms + ✅ Iteration 5: 15.28ms +📊 Average time: 15.47ms (5/5 successful) + +🧪 Testing: Bin metrics.size with span=5000 +📝 Query: source=big5 | bin `metrics.size` span=5000 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.57ms + ✅ Iteration 2: 15.37ms + ✅ Iteration 3: 15.25ms + ✅ Iteration 4: 15.37ms + ✅ Iteration 5: 15.23ms +📊 Average time: 15.35ms (5/5 successful) + +🧪 Testing: Bin metrics.size field only +📝 Query: source=big5 | bin `metrics.size` span=2000 | fields `metrics.size` | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 22.18ms + ✅ Iteration 2: 21.70ms + ✅ Iteration 3: 21.37ms + ✅ Iteration 4: 21.50ms + ✅ Iteration 5: 21.46ms +📊 Average time: 21.64ms (5/5 successful) + +🧪 Testing: Bin metrics.size with bins=5 start=0 end=10000 +📝 Query: source=big5 | bin `metrics.size` bins=5 start=0 end=10000 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.66ms + ✅ Iteration 2: 15.86ms + ✅ Iteration 3: 15.73ms + ✅ Iteration 4: 15.83ms + ✅ Iteration 5: 15.75ms +📊 Average time: 15.76ms (5/5 successful) + +🧪 Testing: Bin metrics.size with bins=10 start=0 end=20000 +📝 Query: source=big5 | bin `metrics.size` bins=10 start=0 end=20000 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.85ms + ✅ Iteration 2: 15.72ms + ✅ Iteration 3: 15.57ms + ✅ Iteration 4: 15.66ms + ✅ Iteration 5: 15.95ms +📊 Average time: 15.75ms (5/5 successful) + +=== TIMESTAMP BIN OPERATIONS === +🧪 Testing: Bin @timestamp with span=1h +📝 Query: source=big5 | bin `@timestamp` span=1h | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 23.84ms + ✅ Iteration 2: 23.19ms + ✅ Iteration 3: 23.99ms + ✅ Iteration 4: 23.74ms + ✅ Iteration 5: 23.97ms +📊 Average time: 23.74ms (5/5 successful) + +🧪 Testing: Bin @timestamp with span=4h +📝 Query: source=big5 | bin `@timestamp` span=4h | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 24.18ms + ✅ Iteration 2: 23.99ms + ✅ Iteration 3: 23.66ms + ✅ Iteration 4: 24.13ms + ✅ Iteration 5: 24.03ms +📊 Average time: 23.99ms (5/5 successful) + +🧪 Testing: Bin @timestamp span=1d +📝 Query: source=big5 | bin `@timestamp` span=1d | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 24.99ms + ✅ Iteration 2: 25.33ms + ✅ Iteration 3: 24.90ms + ✅ Iteration 4: 26.67ms + ✅ Iteration 5: 25.53ms +📊 Average time: 25.48ms (5/5 successful) + +🧪 Testing: Bin @timestamp with span=4mon +📝 Query: source=big5 | bin `@timestamp` span=4mon as cate | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 26.23ms + ✅ Iteration 2: 25.94ms + ✅ Iteration 3: 26.20ms + ✅ Iteration 4: 25.89ms + ✅ Iteration 5: 25.63ms +📊 Average time: 25.97ms (5/5 successful) + +🧪 Testing: Bin metrics.tmin with span=50 +📝 Query: source=big5 | bin `metrics.tmin` span=50 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 14.95ms + ✅ Iteration 2: 15.03ms + ✅ Iteration 3: 15.17ms + ✅ Iteration 4: 15.21ms + ✅ Iteration 5: 15.14ms +📊 Average time: 15.10ms (5/5 successful) + +🧪 Testing: Bin metrics.size with bins=2 +📝 Query: source=big5 | bin `metrics.size` bins=2 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.47ms + ✅ Iteration 2: 15.17ms + ✅ Iteration 3: 15.08ms + ✅ Iteration 4: 15.06ms + ✅ Iteration 5: 15.29ms +📊 Average time: 15.21ms (5/5 successful) + +🧪 Testing: Bin metrics.size with bins=21 +📝 Query: source=big5 | bin `metrics.size` bins=21 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.26ms + ✅ Iteration 2: 14.91ms + ✅ Iteration 3: 14.85ms + ✅ Iteration 4: 14.88ms + ✅ Iteration 5: 14.97ms +📊 Average time: 14.97ms (5/5 successful) + +🧪 Testing: Bin metrics.size with bins=49 +📝 Query: source=big5 | bin `metrics.size` bins=49 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.19ms + ✅ Iteration 2: 14.98ms + ✅ Iteration 3: 15.04ms + ✅ Iteration 4: 15.19ms + ✅ Iteration 5: 15.19ms +📊 Average time: 15.11ms (5/5 successful) + +🧪 Testing: Bin metrics.tmin with minspan=1001 +📝 Query: source=big5 | bin `metrics.tmin` minspan=1001 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 14.99ms + ✅ Iteration 2: 15.23ms + ✅ Iteration 3: 14.85ms + ✅ Iteration 4: 14.98ms + ✅ Iteration 5: 15.20ms +📊 Average time: 15.05ms (5/5 successful) + +🧪 Testing: Bin metrics.tmin start=0 end=1001 +📝 Query: source=big5 | bin `metrics.tmin` start=0 end=1001 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 16.04ms + ✅ Iteration 2: 15.67ms + ✅ Iteration 3: 15.64ms + ✅ Iteration 4: 15.91ms + ✅ Iteration 5: 15.29ms +📊 Average time: 15.71ms (5/5 successful) + +=== LOGARITHMIC BIN OPERATIONS === +🧪 Testing: Bin metrics.size with span=log10 +📝 Query: source=big5 | bin `metrics.size` span=log10 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.32ms + ✅ Iteration 2: 15.45ms + ✅ Iteration 3: 15.53ms + ✅ Iteration 4: 15.61ms + ✅ Iteration 5: 15.67ms +📊 Average time: 15.51ms (5/5 successful) + +🧪 Testing: Bin metrics.size with span=2log10 +📝 Query: source=big5 | bin `metrics.size` span=2log10 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.78ms + ✅ Iteration 2: 15.70ms + ✅ Iteration 3: 15.28ms + ✅ Iteration 4: 15.17ms + ✅ Iteration 5: 14.89ms +📊 Average time: 15.36ms (5/5 successful) + +🧪 Testing: Bin metrics.size with span=log2 +📝 Query: source=big5 | bin `metrics.size` span=log2 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 14.82ms + ✅ Iteration 2: 14.94ms + ✅ Iteration 3: 14.95ms + ✅ Iteration 4: 15.29ms + ✅ Iteration 5: 15.16ms +📊 Average time: 15.03ms (5/5 successful) + +🧪 Testing: Bin metrics.size with span=1.5log10 +📝 Query: source=big5 | bin `metrics.size` span=1.5log10 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.79ms + ✅ Iteration 2: 16.58ms + ✅ Iteration 3: 15.73ms + ✅ Iteration 4: 15.76ms + ✅ Iteration 5: 15.45ms +📊 Average time: 15.86ms (5/5 successful) + +🧪 Testing: Bin metrics.size with span=1.5log3 +📝 Query: source=big5 | bin `metrics.size` span=1.5log3 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.14ms + ✅ Iteration 2: 14.84ms + ✅ Iteration 3: 15.77ms + ✅ Iteration 4: 15.83ms + ✅ Iteration 5: 15.50ms +📊 Average time: 15.41ms (5/5 successful) + +=== TIME UNIT BIN OPERATIONS === +🧪 Testing: Bin @timestamp with span=30seconds +📝 Query: source=big5 | bin `@timestamp` span=30seconds | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 24.36ms + ✅ Iteration 2: 23.53ms + ✅ Iteration 3: 23.82ms + ✅ Iteration 4: 24.15ms + ✅ Iteration 5: 23.46ms +📊 Average time: 23.86ms (5/5 successful) + +🧪 Testing: Bin @timestamp with span=45minute +📝 Query: source=big5 | bin `@timestamp` span=45minute | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 23.36ms + ✅ Iteration 2: 23.18ms + ✅ Iteration 3: 23.69ms + ✅ Iteration 4: 23.13ms + ✅ Iteration 5: 22.86ms +📊 Average time: 23.24ms (5/5 successful) + +🧪 Testing: Bin @timestamp with span=7day +📝 Query: source=big5 | bin `@timestamp` span=7day | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 24.87ms + ✅ Iteration 2: 25.26ms + ✅ Iteration 3: 25.58ms + ✅ Iteration 4: 25.08ms + ✅ Iteration 5: 25.05ms +📊 Average time: 25.16ms (5/5 successful) + +🧪 Testing: Bin @timestamp with span=6day +📝 Query: source=big5 | bin `@timestamp` span=6day | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 25.13ms + ✅ Iteration 2: 25.52ms + ✅ Iteration 3: 25.56ms + ✅ Iteration 4: 25.14ms + ✅ Iteration 5: 24.87ms +📊 Average time: 25.24ms (5/5 successful) + +=== ALIGNTIME BIN OPERATIONS === +🧪 Testing: Bin @timestamp span=12h aligntime='@d+3h' +📝 Query: source=big5 | bin `@timestamp` span=12h aligntime=\"@d+3h\" | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 24.94ms + ✅ Iteration 2: 24.10ms + ✅ Iteration 3: 24.37ms + ✅ Iteration 4: 24.85ms + ✅ Iteration 5: 24.13ms +📊 Average time: 24.47ms (5/5 successful) + +🧪 Testing: Bin @timestamp span=12h aligntime='@d-1h' +📝 Query: source=big5 | bin `@timestamp` span=12h aligntime=\"@d-1h\" | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 24.46ms + ✅ Iteration 2: 23.95ms + ✅ Iteration 3: 23.91ms + ✅ Iteration 4: 23.81ms + ✅ Iteration 5: 23.44ms +📊 Average time: 23.91ms (5/5 successful) + +🧪 Testing: Bin @timestamp span=12h aligntime=1500000000 +📝 Query: source=big5 | bin `@timestamp` span=12h aligntime=1500000000 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 22.98ms + ✅ Iteration 2: 23.58ms + ✅ Iteration 3: 23.66ms + ✅ Iteration 4: 23.16ms + ✅ Iteration 5: 22.82ms +📊 Average time: 23.24ms (5/5 successful) + +=== LOAD TESTING WITH DIFFERENT RESULT SIZES === +🧪 Testing: Large result set (100 rows) +📝 Query: source=big5 | bin `@timestamp` span=1h | head 100 +🔁 Running 3 iterations... + ✅ Iteration 1: 29.98ms + ✅ Iteration 2: 30.36ms + ✅ Iteration 3: 30.07ms +📊 Average time: 30.13ms (3/3 successful) + +🧪 Testing: Larger result set (500 rows) +📝 Query: source=big5 | bin `@timestamp` span=1h | head 500 +🔁 Running 3 iterations... + ✅ Iteration 1: 61.30ms + ✅ Iteration 2: 61.22ms + ✅ Iteration 3: 61.36ms +📊 Average time: 61.29ms (3/3 successful) + +🧪 Testing: Very large result set (1000 rows) +📝 Query: source=big5 | bin `@timestamp` span=1h | head 1000 +🔁 Running 2 iterations... + ✅ Iteration 1: 100.67ms + ✅ Iteration 2: 100.38ms +📊 Average time: 100.52ms (2/2 successful) + +=== PERFORMANCE COMPARISON === +🧪 Testing: Small numeric span (10) +📝 Query: source=big5 | bin `metrics.size` span=10 | head 50 +🔁 Running 3 iterations... + ✅ Iteration 1: 17.92ms + ✅ Iteration 2: 18.03ms + ✅ Iteration 3: 17.70ms +📊 Average time: 17.88ms (3/3 successful) + +🧪 Testing: Medium numeric span (100) +📝 Query: source=big5 | bin `metrics.size` span=100 | head 50 +🔁 Running 3 iterations... + ✅ Iteration 1: 17.70ms + ✅ Iteration 2: 17.42ms + ✅ Iteration 3: 17.44ms +📊 Average time: 17.52ms (3/3 successful) + +🧪 Testing: Large numeric span (1000) +📝 Query: source=big5 | bin `metrics.size` span=1000 | head 50 +🔁 Running 3 iterations... + ✅ Iteration 1: 17.35ms + ✅ Iteration 2: 17.35ms + ✅ Iteration 3: 17.70ms +📊 Average time: 17.46ms (3/3 successful) + +=== BASELINE COMPARISON === +🧪 Testing: Baseline (no bin) +📝 Query: source=big5 | head 100 +🔁 Running 5 iterations... + ✅ Iteration 1: 21.36ms + ✅ Iteration 2: 21.43ms + ✅ Iteration 3: 21.73ms + ✅ Iteration 4: 21.64ms + ✅ Iteration 5: 21.57ms +📊 Average time: 21.54ms (5/5 successful) + +🧪 Testing: Baseline with fields +📝 Query: source=big5 | fields `@timestamp`, `metrics.size` | head 100 +🔁 Running 5 iterations... + ✅ Iteration 1: 16.26ms + ✅ Iteration 2: 16.03ms + ✅ Iteration 3: 16.16ms + ✅ Iteration 4: 16.06ms + ✅ Iteration 5: 15.58ms +📊 Average time: 16.01ms (5/5 successful) + +=== CONCURRENT LOAD TEST === +🚀 Running concurrent load test (3 parallel requests)... +Temp directory: /tmp/tmp.w737guGOZ0 +Starting concurrent request 1 (timestamp bin)... +Starting concurrent request 2 (numeric bin)... +Starting concurrent request 3 (bins parameter)... +Concurrent test results: +Request 1: +📊 Average time: 30.66ms (1/1 successful) + +Request 2: +📊 Average time: 21.38ms (1/1 successful) + +Request 3: +📊 Average time: 18.80ms (1/1 successful) + + +=== SIMPLE BIN VARIATIONS === +🧪 Testing: Bin with alias +📝 Query: source=big5 | bin `metrics.size` span=100 as size_bucket | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 22.80ms + ✅ Iteration 2: 22.48ms + ✅ Iteration 3: 22.42ms + ✅ Iteration 4: 22.22ms + ✅ Iteration 5: 22.45ms +📊 Average time: 22.47ms (5/5 successful) + +🧪 Testing: Multiple field bin (sequential) +📝 Query: source=big5 | bin `metrics.size` span=100 | bin `metrics.tmin` span=50 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 14.90ms + ✅ Iteration 2: 15.20ms + ✅ Iteration 3: 15.24ms + ✅ Iteration 4: 15.03ms + ✅ Iteration 5: 15.35ms +📊 Average time: 15.14ms (5/5 successful) + +🧪 Testing: Bin with very small span +📝 Query: source=big5 | bin `metrics.size` span=1 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.20ms + ✅ Iteration 2: 14.68ms + ✅ Iteration 3: 14.91ms + ✅ Iteration 4: 14.89ms + ✅ Iteration 5: 14.66ms +📊 Average time: 14.86ms (5/5 successful) + +🧪 Testing: Bin with very large span +📝 Query: source=big5 | bin `metrics.size` span=100000 | head 10 +🔁 Running 5 iterations... + ✅ Iteration 1: 15.09ms + ✅ Iteration 2: 14.81ms + ✅ Iteration 3: 15.05ms + ✅ Iteration 4: 14.78ms + ✅ Iteration 5: 14.66ms +📊 Average time: 14.87ms (5/5 successful) + + +=== SUMMARY === +✅ Simplified bin command performance testing complete +💡 Key metrics to analyze: + - Pure bin operation performance without aggregation overhead + - Performance difference between numeric and timestamp binning + - Impact of different span sizes and bin parameters + - Overhead of bin operations vs baseline queries + - Concurrent request handling capability + +🎯 Performance optimization targets: + - Focus on bin algorithm efficiency + - Memory usage for binning operations + - CPU usage during logarithmic span calculations + - Timestamp parsing and binning performance + +============================================ +🏁 SIMPLIFIED BIN PERFORMANCE TEST COMPLETE +============================================ + +``` + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalciteBinCommandBig5IT.java:None` + + +Please refrain from leaving commented-out / no-longer-used code in git. + +Maybe you could replace the dataset with one existing in the sql repository already if you still want to include this test. + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalciteBinCommandBig5IT.java:None` + + +Removed + + +### @yuancu on `docs/user/ppl/cmd/bin.rst:None` + + +This seems no longer valid? + +The latest implementation (15aa916d3f16730808200943a4ab44108d259f94) categorize all age into `0-10000` + + +### @ahkcs on `docs/user/ppl/cmd/bin.rst:None` + + +updated + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/BinUtils.java:None` + + +Is floating point span supported? + +From the doc: +> Test nice widths: Iterate through powers of 10 from smallest to largest: [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] + +It seems floating-point ones are supported. Yet this line will cast them to integer value spans, which will cause divide-by-zero exception for the following PPL: + +``` +source=opensearch-sql_test_index_account | bin age span=0.9 +``` + + +### @yuancu on `integ-test/src/test/resources/time_test_data.json:1` + + +It seems this test resource is not used in any test? + +In order to use it, I think you should first create a corresponding index mapping, then load the index as how you load other test date in tests. + + +### @yuancu on `docs/user/ppl/cmd/bin.rst:None` + + +It should throw an error when unexpected combinations of parameters coexist to prevent undefined behaviors. The current implementation doesn't. + + +### @yuancu on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Should users follow a prescribed order when using these parameters? E.g. start cannot come before bins. If so, it should be clarified in the document / throw an explanation error. Otherwise, the grammar should be refined. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/BinUtils.java:None` + + +I'm a little confused about the design decision: why is there such constraint on the field name (`@timestamp` only)? + +Is it because binning on timestamp is only applicable to `@timestamp` in SPL? + +IMO, I think it should work as long as the field type is supported (numeric, timestamp, date, etc.) + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/BinSpanFunction.java:None` + + +I am a little confused about the meaning of this division factor. + +From the context, it seems that it means the time unit in milliseconds: +``` +MICROSECONDS -> MICROS_PER_MILLI +MILLISECONDS -> 1 +CENTISECONDS -> MILLIS_PER_CENTISECOND +... +``` + +But why does units above seconds has the division factor `1`? + +This makes the function `convertToTargetUnit` hard to interpret: + +```java + /** Converts timestamp to the target time unit. */ + private static RexNode convertToTargetUnit( + RexNode fieldExpr, TimeUnitConfig config, CalcitePlanContext context) { + RexNode epochMillis = + context.rexBuilder.makeCall(PPLBuiltinOperators.UNIX_TIMESTAMP, fieldExpr); + + if (config.divisionFactor == 1) { + return epochMillis; + } else if (config.divisionFactor > 1) { + // For sub-second units or conversion to larger units + return context.relBuilder.call( + SqlStdOperatorTable.DIVIDE, + epochMillis, + context.relBuilder.literal(config.divisionFactor)); + } else { + // For microseconds (multiply by 1000) + return context.relBuilder.call( + SqlStdOperatorTable.MULTIPLY, epochMillis, context.relBuilder.literal(MICROS_PER_MILLI)); + } + } + +``` + +For those above seconds, it does not convert the timestamp in mills to the target unit. This invalidates the following binning operation: +```java + // Perform binning calculation: FLOOR(adjusted_value / interval) * interval + RexNode binValue = performBinning(adjustedValue, intervalValue, context); +``` + +As a result, when the span is in hour or minute, binning does not work. E.g. + +``` +source=time_test | bin @timestamp span=1h as cate | fields cate, @timestamp +``` + +cate | @timestamp +-- | -- +2025-07-28 00:15:23 | 2025-07-28 00:15:23 +2025-07-28 01:42:15 | 2025-07-28 01:42:15 +2025-07-28 02:28:45 | 2025-07-28 02:28:45 +2025-07-28 03:56:20 | 2025-07-28 03:56:20 +2025-07-28 04:33:10 | 2025-07-28 04:33:10 + +The timestamp is not binned. + +~~But mysteriously, when the unit is above day, they are binned again.~~. <-- It's because it's using another function `createDaysSpan`, instead of `createStandardTimeSpan` + + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/BinSpanFunction.java:None` + + +Binning into month does not work: + +``` +source=time_test | bin @timestamp span=1mon as cate | fields cate, @timestamp | head 5 +``` + +```json +{ + "schema": [ + { + "name": "cate", + "type": "timestamp" + }, + { + "name": "@timestamp", + "type": "timestamp" + } + ], + "datarows": [ + [ + "2025-07-28 00:15:23", + "2025-07-28 00:15:23" + ], + [ + "2025-07-28 01:42:15", + "2025-07-28 01:42:15" + ], + [ + "2025-07-28 02:28:45", + "2025-07-28 02:28:45" + ], + [ + "2025-07-28 03:56:20", + "2025-07-28 03:56:20" + ], + [ + "2025-07-28 04:33:10", + "2025-07-28 04:33:10" + ] + ], + "total": 5, + "size": 5 +} +``` + +Please check your implementation. After fixing it, please add corresponding IT. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/ppl/BinCommandIT.java:None` + + +All ITs need to verify result correctness with `verifyDataRows`. It helps reviewers and guardrails future edits. + + +### @yuancu on `docs/user/ppl/cmd/bin.rst:None` + + +After a little research, I did not find any **named** algorithms like *SPL's "nice number" algorithm*. + +I understand that the behavior is expected to be aligned with SPL. But I don't think SPL invented them or hold a patent (which would be worse due to legal issues). It might be better to refrain from mentioning SPL in this document. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/BinUtils.java:None` + + +floating point span is not supported, you can only use log-span `[]log[]` or span-length `[]` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/BinUtils.java:None` + + +I see. In that case, maybe `0.001, 0.01, 0.1` should be removed from nice numbers for consistency? + +And you should probably throw an error when user try to use a fp span. + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/BinUtils.java:None` + + +I made a new revision, now the floating point span is supported + + +### @ahkcs on `integ-test/src/test/resources/time_test_data.json:1` + + +Updated + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/BinUtils.java:None` + + +We have changed our design decision. Now there's no constraint on the field name + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/BinSpanFunction.java:None` + + +Updated + + +### @ahkcs on `core/src/main/java/org/opensearch/sql/calcite/utils/BinSpanFunction.java:None` + + +Fixed + + +### @ahkcs on `integ-test/src/test/java/org/opensearch/sql/ppl/BinCommandIT.java:None` + + +Updated IT tests + + +### @ahkcs on `docs/user/ppl/cmd/bin.rst:None` + + +Updated bin.rst file + + +### @ahkcs on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +I think there's no strict prescribed order that users should follow + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/expression/Argument.java:None` + + +These is a `@Getter` annotation in class, no need to add these methods. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/expression/Field.java:None` + + +ditto + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/expression/Let.java:None` + + +ditto + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/expression/Literal.java:None` + + +ditto + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/expression/Map.java:None` + + +ditto + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java:None` + + +ditto + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java:596` + + +javadoc is required for any new interface + + +## General Comments + + +### @yuancu + + +I haven't read through the implementations yet, but an initial attempt with the `bins` option returns an confusing result: + +I tested it on the [accounts index](https://github.com/opensearch-project/sql/blob/64e856232195163a83b483a080d7ce0b9a59e457/integ-test/src/test/resources/accounts.json), where the max age is 40, and the minimum age is 20. The query `source=opensearch-sql_test_index_account | bin age bins=4` gives the following logical plan: + +``` +LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], age_bin=[+(*(FLOOR(DIVIDE(-($8, 0.0E0:DOUBLE), /(1000.0E0:DOUBLE, 4))), /(1000.0E0:DOUBLE, 4)), 0.0E0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) +``` + +From the plan, the new `age_bin` field is calculated as `FLOOR($8 / (1000.0 / 4)) * (1000.0 / 4)`. I am confused where is the `1000` from. According to my understanding, it should be like: + +``` +max = max(age) <-- 40 +min = min(age) <-- 20 +span = (max - min) / bins <-- 5 +age_bin = floor((age - min) / span) * span + min + = floor((age - 20) / 5) * 5 + 20 +``` + +I guess you did not calculate the max and min, but directly using 1000 and 0? + + + +### @yuancu + + +The PR description is very solid and in detail. It would be better if you could create an issue or link it to an existing issue for this PR. + + +### @yuancu + + +The `start` and `end` options does not work as expected as well. For example, in the result of `source=opensearch-sql_test_index_account | bin age span=5 start=30 end=40`, age 28 is binned to 25; age 23 is binned to 20. + +But the start is 30, there shouldn't be 25 and 20 in the result. The logical plan it gives contains no start and end information either: +``` +LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], age_bin=[*(FLOOR(/($8, 5)), 5)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) +``` + +By the way, for values below the start, should they be converted to the start value or be kept as is? + + +### @yuancu + + +Could you also add tests in `ExplainIT` (or `CalciteExplainIT` if the command is only available since 3.1.0) to validate the generated logical and physical plan? + + +### @ahkcs + + +> > The `start` and `end` options does not work as expected as well. For example, in the result of `source=opensearch-sql_test_index_account | bin age span=5 start=30 end=40`, age 28 is binned to 25; age 23 is binned to 20. +> > But the start is 30, there shouldn't be 25 and 20 in the result. The logical plan it gives contains no start and end information either: +> > ``` +> > LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], age_bin=[*(FLOOR(/($8, 5)), 5)]) +> > CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) +> > ``` +> > +> > +> > +> > +> > +> > +> > +> > +> > +> > +> > +> > By the way, for values below the start, should they be converted to the start value or be kept as is? +> + +Thanks for the reply, I have updated our implementation for `start/end` The start and end parameters don't filter data but instead expand the range used for bin width calculation - for example, bin age start=0 end=99 creates 3 bins (20-30, 30-40, 40-50) while bin age start=0 end=102 creates 1 bin (0-100) with all 1000 records, using "nice number" algorithm that chooses different bin widths based on the total range +I post more details in PR description + + + +### @ahkcs + + +> I haven't read through the implementations yet, but an initial attempt with the `bins` option returns an confusing result: +> +> I tested it on the [accounts index](https://github.com/opensearch-project/sql/blob/64e856232195163a83b483a080d7ce0b9a59e457/integ-test/src/test/resources/accounts.json), where the max age is 40, and the minimum age is 20. The query `source=opensearch-sql_test_index_account | bin age bins=4` gives the following logical plan: +> +> ``` +> LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], age_bin=[+(*(FLOOR(DIVIDE(-($8, 0.0E0:DOUBLE), /(1000.0E0:DOUBLE, 4))), /(1000.0E0:DOUBLE, 4)), 0.0E0)]) +> CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) +> ``` +> +> From the plan, the new `age_bin` field is calculated as `FLOOR($8 / (1000.0 / 4)) * (1000.0 / 4)`. I am confused where is the `1000` from. According to my understanding, it should be like: +> +> ``` +> max = max(age) <-- 40 +> min = min(age) <-- 20 +> span = (max - min) / bins <-- 5 +> age_bin = floor((age - min) / span) * span + min +> = floor((age - 20) / 5) * 5 + 20 +> ``` +> +> I guess you did not calculate the max and min, but directly using 1000 and 0? + +I updated the implementation. Our current implementation now correctly calculates min(age)=20 and max(age)=40 dynamically, and importantly, now bins=4 doesn't create exactly 4 bins but rather creates at most 4 bins using "nice number" widths - so bins=4 for age 20-40 actually produces 3 bins (20-30, 30-40, 40-50) with width=10 instead of 4 bins with width=5, because now it prioritizes human-readable bin boundaries over exact bin counts. Also, we now modify the original age field in-place instead of creating a separate age_bin field + + +### @yuancu + + +> importantly, now bins=4 doesn't create exactly 4 bins but rather creates at most 4 bins using "nice number" widths - so bins=4 for age 20-40 actually produces 3 bins (20-30, 30-40, 40-50) with width=10 instead of 4 bins with width=5, because now it prioritizes human-readable bin boundaries over exact bin counts. + +Could you please give a precise definition of *nice number* in the documentation? + +`source=opensearch-sql_test_index_account | bin age bins=1000 as age_cate | fields age_cate, age` also results in categories of $$[20, 30), [30, 40), [40, 50)$$. The choice of a minimum interval of 10 seems lacking of explanation? + + +### @yuancu + + +It seems that in the latest implementation, you replace the target field of `bin` to the range of bin as `start-end` ( with range $$[start, end)$$ ), instead of adding a new field called `[target field]_bin` and storing `start`. Could you please update the documentation accordingly? + + +### @yuancu + + +I found that the `span` option does not work with `start` and `end`. Is this intentional? + +E.g. for `bin age span=1 start=25 end=35`, the values below 25 or above 35 are not categorized into `Other`. + + +### @yuancu + + +The result of `bins` combined with `start` and `end` looks also counter-intuitive for me: + +E.g. A partial result of `source=opensearch-sql_test_index_account | eval age0 = age | bin age bins=1 start=25 end=35 | fields age0, age` is as below + +age0 | age +-- | -- +32 | Outlier +36 | Outlier +28 | 20-30 +33 | Outlier +36 | Outlier +39 | Outlier +34 | Outlier +39 | Outlier + +Why are they categorized into 20 ~ 30 instead of 25 ~ 35? I guess it has something to do with the *nice number*, but did not find a clue from the doc. + +Btw, this `Other` category is sometimes called `Other`, and sometimes `Outlier`. Is there a pattern? + + + +### @yuancu + + +I tried a query with `span=0.5`: `source=opensearch-sql_test_index_account | bin age span=0.5 as age_cate | fields age_cate, age`. It results into an OOM error and crashed the cluster: + +```json +{ + "error": { + "reason": "Invalid Query", + "details": "Java heap space: failed reallocation of scalar replaced objects", + "type": "CalciteUnsupportedException" + }, + "status": 400 +} +``` + +Could you please fix it? + + +### @yuancu + + +If I am going to apply `bin` on a date/time related field, is the support exclusive to timestamp? + +I tried to bin a date field, it ran into a 500 error: + +`source=bank | bin birthdate` + +```json +{ + "error": { + "reason": "There was internal problem at backend", + "details": "java.sql.SQLException: Error while preparing plan [LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[CASE(SEARCH($3, Sarg[[0..10000)]), '0-10000', SEARCH($3, Sarg[[10000..20000)]), '10000-20000', SEARCH($3, Sarg[[20000..30000)]), '20000-30000', SEARCH($3, Sarg[[30000..40000)]), '30000-40000', SEARCH($3, Sarg[[40000..50000)]), '40000-50000', 'Other')], date_value=[$4], gender=[$5], city=[$6], lastname=[$7], balance=[$8], employer=[$9], state=[$10], age=[$11], email=[$12], male=[$13])\n CalciteLogicalIndexScan(table=[[OpenSearch, bank]])\n]", + "type": "RuntimeException" + }, + "status": 500 +} +``` + +The birthdate is of type date: +```json +{ + "mappings": { + "properties": { + "birthdate": { + "type": "date" + } + } +} +``` + + +If the command is exclusive to timestamp, there should be a proper error information stating so. + +**Update**: I did not succeed with *epoch_millis* used in your IT either. Please check the implementation. + + +### @ahkcs + + +Hi @yuancu , I made a new commit that includes a big update on my `bin` command implementation that resolves the above replies, you can check the `bin.rst` for more details + + +### @yuancu + + +The query `source=bank | bin birthdate` results in the following error: + +```json +{ + "error": { + "reason": "Invalid Query", + "details": "Cannot infer return type for /; operand types: [EXPR_TIMESTAMP, DOUBLE]", + "type": "IllegalArgumentException" + }, + "status": 400 +} +``` + +Can you look into it? Here is the index mapping of `bank`: [bank_index_mapping.json](https://github.com/opensearch-project/sql/blob/71aa9ba0490305b81284eb369f212a8fc6e3936f/integ-test/src/test/resources/indexDefinitions/bank_index_mapping.json) + + +### @ahkcs + + +> The query `source=bank | bin birthdate` results in the following error: +> +> ```json +> { +> "error": { +> "reason": "Invalid Query", +> "details": "Cannot infer return type for /; operand types: [EXPR_TIMESTAMP, DOUBLE]", +> "type": "IllegalArgumentException" +> }, +> "status": 400 +> } +> ``` +> +> Can you look into it? + +For time-based binning, we will need to use bin @timestamp. And we require the dataset used to have the field @timestamp, otherwise time-based binning can't be used, you can use this dataset `time_test_data.json` I uploaded to test + + +### @anasalkouz + + +Do you have the performance benchmark results? + + +### @LantaoJin + + +missing changes in `PPLQueryDataAnonymizer` and `PPLQueryDataAnonymizerTest` + + +### @ahkcs + + +> missing changes in `PPLQueryDataAnonymizer` and `PPLQueryDataAnonymizerTest` + +Added changes in `PPLQueryDataAnonymizer` and `PPLQueryDataAnonymizerTest` + + +### @ahkcs + + +> Basically LGTM except https://github.com/opensearch-project/sql/pull/3878/files#r2269164173 + +This IT failure no longer exists in the latest local run with the new fixes commits added, I have removed the change + + +### @qianheng-aws + + +@ahkcs Could you please elaborate more on `bins` parameter? + +In my understanding, It seems to find the smallest precision to split our data into several buckets with number less than or equal to k, which is 3 for example in your description. + +My question is: +1. Could this be wrapped in a function instead of compositing plenty of basic SqlOperators, which is very confusing as shown in the plan. In my opinion, PPL users, even developers don't need to care about the implementation details from the plan, but only whether right functions are used here and what it returns. +2. Could this process be simplified without iterating on different precisions from 0.001 to 10000? I think there should be an approach to quickly find the proper precision, like using `range/bins`. + + +### @ahkcs + + +> @ahkcs Could you please elaborate more on `bins` parameter? +> +> In my understanding, It seems to find the smallest precision to split our data into several buckets with number less than or equal to k, which is 3 for example in your description. +> +> My question is: +> +> 1. Could this be wrapped in a function instead of compositing plenty of basic SqlOperators, which is very confusing as shown in the plan. In my opinion, PPL users, even developers don't need to care about the implementation details from the plan, but only whether right functions are used here and what it returns. +> 2. Could this process be simplified without iterating on different precisions from 0.001 to 10000? I think there should be an approach to quickly find the proper precision, like using `range/bins`. + +Hi @qianheng-aws, thanks for your reply. For the first question, I've created a `BinWidthCalculatorFunction` that wraps the bins parameter logic and updated the explain output and CalciteExplainIT. + +For the second question, the current algorithm we have is this: Choose the first width where `CEIL(data_range / width)` ≤ `requested_bins`, which I think is essentially the same as `range/bins`. + + +### @qianheng-aws + + +> I've created a BinWidthCalculatorFunction that wraps the bins parameter logic and updated the explain output and CalciteExplainIT. + +Thanks for the change, it's better than before. But the latest plan for bins still ends with a very large `LogicalProject` before aggregation. The further enhancement idea is wrapping the logic of generating bins(e.g. 10-20) into another function and it accepts both `field` and output of `BinWidthCalculatorFunction` as its parameters. What's better is combining these 2 functions into 1.(But if we plan to implement `BinWidthCalculatorFunction` as a window function, we should split them later, see below) + +Another concern is we have to calculate the width for each row while the results should be the same since they share the same value of min and max. For the consideration of performance, we'd better implement `BinWidthCalculatorFunction` as window function, which is actually the wrapper of `min` + `max` + current logic of this function as a post-process. This could optimize the time complexity from O(n) to O(1). Leaving this enhancement as a TODO is also fine to me. + +> For the second question, the current algorithm we have is this: Choose the first width where CEIL(data_range / width) ≤ requested_bins, which I think is essentially the same as range/bins. + +The current algorithm calculates all bins number before the latest change. Although with the latest change, it still has to iterate `NICE_WIDTHS` until finding the proper one. In the worst case, it has to iterate all WIDTHS. My thought is why not calculating the `NICE_WIDTH` directly. + + + +### @qianheng-aws + + +@ahkcs Could you please check the implementation of other parameters as well? Haven't looked into the implementation details, but, in my opinion, most new plans of this PR look to large and unfriendly to users. + + +### @ahkcs + + +> Thanks for the change, it's better than before. But the latest plan for bins still ends with a very large `LogicalProject` before aggregation. The further enhancement idea is wrapping the logic of generating bins(e.g. 10-20) into another function and it accepts both `field` and output of `BinWidthCalculatorFunction` as its parameters. What's better is combining these 2 functions into 1.(But if we plan to implement `BinWidthCalculatorFunction` as a window function, we should split them later, see below) +> +> Another concern is we have to calculate the width for each row while the results should be the same since they share the same value of min and max. For the consideration of performance, we'd better implement `BinWidthCalculatorFunction` as window function, which is actually the wrapper of `min` + `max` + current logic of this function as a post-process. This could optimize the time complexity from O(n) to O(1). Leaving this enhancement as a TODO is also fine to me. +> + +Thanks for the detailed feedback! We've implemented a unified BIN_CALCULATOR UDF that consolidates all binning logic (span, bins, minspan) into a single function, which has simplified the plans. For the second enhancement, we'll add these as TODOs for future optimization. + +> The current algorithm calculates all bins number before the latest change. Although with the latest change, it still has to iterate `NICE_WIDTHS` until finding the proper one. In the worst case, it has to iterate all WIDTHS. My thought is why not calculating the `NICE_WIDTH` directly. + +For this, I have updated the implementation to calcute 'NICE_WIDTH' directly + + +### @qianheng-aws + + +LGTM except this place: https://github.com/opensearch-project/sql/pull/3878#discussion_r2287391692. Please take another look @yuancu @LantaoJin + + +### @LantaoJin + + +@qianheng-aws please take another look. + + +### @ahkcs + + +> Given the size and complexity of this PR, it's challenging for me to thoroughly review all the changes and verify every logical detail. I'll provide a preliminary approval so my change request doesn't block progress, though please count this as a half an approval only. +> +> I recommend obtaining approval from all contributors who have been heavily involved in reviewing this PR before merging. @ykmr1224 @qianheng-aws @LantaoJin @Swiddis + +Thanks for the review and the preliminary approval. After discussing with the team, we’ve decided to go ahead and merge this PR first, and then address any additional concerns in follow-up issues. + + +### @ykmr1224 + + +@ahkcs +Seems backport failed for this PR. +And it is causing backport failure for https://github.com/opensearch-project/sql/pull/4100 +Can you fix the backport issue? + + + +--- + +# PR #3875: Bump gradle to 8.14 and java to 24 + +**URL:** https://github.com/opensearch-project/sql/pull/3875 + +**Author:** @LantaoJin + +**Created:** 2025-07-14T03:40:58Z + +**State:** MERGED + +**Merged:** 2025-07-17T23:09:38Z + +**Changes:** +19 -22 (11 files) + +**Labels:** `dependencies` + + +## Description + +### Description +Updated gradle to 8.14 and java version to 24. + +Related dependencies dumped to support java 24: +| dependencies | before | after | +| ------------- | ------------- | ------------- | +| com.netflix.nebula.ospackage-base | 11.5.0 | 12.0.0 | +| io.freefair.lombok | 8.4 | 8.14 | +| net.bytebuddy.byte-buddy | 1.14.19 | 1.15.11 | + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +@LantaoJin Did u bump plugin jdk to 24? + + +### @peterzhuamazon + + +> @LantaoJin Did u bump plugin jdk to 24? + +Hi @penghuo if you are talking about min compat that version needs to stay at 21. + +Thanks. + + +--- + +# PR #3874: [Backport 2.19-dev] Add compare_ip operator udfs (#3821) + +**URL:** https://github.com/opensearch-project/sql/pull/3874 + +**Author:** @ishaoxy + +**Created:** 2025-07-13T13:00:53Z + +**State:** MERGED + +**Merged:** 2025-07-15T06:36:08Z + +**Changes:** +353 -93 (9 files) + + +## Description + +* ip_compare operator added + + + +* only type checker issue left + + + +* fix by modifying ip.sqlTypeName from OTHER to NULL in type checker + + + +* fix less + + + +* modify the CalcitePPLFunctionTypeTest text + + + +* allow CalciteIPComparisonIT in CalciteNoPushdownIT + + + +* Modify the signature description in udf + + + +* fix some typing errors + + + +* modify the udfs for better style + + + +* Make IpComparisonOperators an inner enum of CompareIPFunction + + + +* modify registerOperator + + + +* modify registerOperator + + + +* add type checker for cidr + + + +* add javadoc + + + +* move switch case to the implement method + + + +--------- + + + + +(cherry picked from commit 6c3efa1475f815fc9025a2e3c533b7b21d9ea8cf) + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3871: [Backport 2.19-dev] Add ClickBench IT Suite (#3860) + +**URL:** https://github.com/opensearch-project/sql/pull/3871 + +**Author:** @LantaoJin + +**Created:** 2025-07-11T08:01:51Z + +**State:** MERGED + +**Merged:** 2025-07-14T23:22:08Z + +**Changes:** +962 -109 (58 files) + + +## Description + +* Add clickbench IT Suite + + + +* Add original SQL and fix the bug of Sarg point (q41) + + + +* fix IT of Sarg related + + + +* Fix UT + + + +--------- + + Backport https://github.com/opensearch-project/sql/pull/3860 +(cherry picked from commit 9af1567bbb4b04cafbfd40f460f3c0384b31dc81) + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +ping @noCharger @penghuo + + +--- + +# PR #3870: [Backport 2.19-dev] Update ppl documentation index for new functions + +**URL:** https://github.com/opensearch-project/sql/pull/3870 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-11T05:44:03Z + +**State:** MERGED + +**Merged:** 2025-07-11T09:30:18Z + +**Changes:** +6 -0 (1 files) + + +## Description + +Backport 81d3741182872b2521ee4db6f07214ecda6c7b75 from #3868. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3869: [Backport 2.19-dev] Add explain ITs with Calcite without pushdown + +**URL:** https://github.com/opensearch-project/sql/pull/3869 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-11T03:38:00Z + +**State:** MERGED + +**Merged:** 2025-07-11T05:39:56Z + +**Changes:** +269 -161 (41 files) + + +## Description + +Backport 9aaff7a87b01acdf94ab6372260db073277e4d30 from #3786. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3868: Update ppl documentation index for new functions + +**URL:** https://github.com/opensearch-project/sql/pull/3868 + +**Author:** @ps48 + +**Created:** 2025-07-10T18:56:49Z + +**State:** MERGED + +**Merged:** 2025-07-11T05:43:46Z + +**Changes:** +6 -0 (1 files) + +**Labels:** `documentation`, `backport 2.19-dev` + + +## Description + +### Description +Update ppl documentation index for new functions. There were several new functionalities added for PPL calcite engine. These are just missing from the main documentation index. + +Relevant PRs: +JSON functions: https://github.com/opensearch-project/sql/pull/3559 +Array/Collection/Multi-value functions : https://github.com/opensearch-project/sql/pull/3584 +Cypro functions: https://github.com/opensearch-project/sql/pull/3574 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @vamsimanohar + + +@LantaoJin could you please make sure we update the main index doc with any new functionality. + + + +--- + +# PR #3867: Support `reverse` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3867 + +**Author:** @selsong + +**Created:** 2025-07-10T17:04:57Z + +**State:** MERGED + +**Merged:** 2025-08-06T16:34:59Z + +**Changes:** +541 -0 (13 files) + +**Labels:** `enhancement`, `PPL`, `backport 2.19-dev`, `backport 3.2` + + +## Description + +### Description +Implement the `reverse` command in PPL to flip the result order of records. This PR only includes the grammar implementation, pushdown optimization will be raised in a 2nd followup PR. + +### Syntax +``` + | [commands] | reverse | [commands] +``` +- `reverse` takes no arguments. +- It can appear anywhere in the pipeline including the end, and will reverse the order of rows produced up to that point. +- Unlike `sort`, which allows field-based sorting, `reverse` simply inverts the row order as is. + +### Semantics +**Expected Behavior:** +- **Action**: Reverses the display order of all rows in the current result set +- **Scope**: Operates on the entire result set at the point where `reverse` appears in the pipeline +- **Data Preservation**: Does not modify field values or schema + +**Implementation Approach:** +Assigns sequential numbers to each row, then sorts by those numbers in descending order to achieve proper reversal regardless of existing sort operations. + +### Example Queries + +```ppl +-- Reverse original ingestion order +source=accounts | reverse + +-- Reverse after sorting by age (youngest to oldest → oldest to youngest) +source=accounts | sort age | reverse + +-- Get the last 5 records in the original order +source=accounts | reverse | head 5 + +-- Reverse after filtering and projecting fields +source=accounts | fields name, age | reverse +``` + +### Restrictions / Limitations +- **Calcite Engine Only**: Requires plugins.query.calcite.enabled=true +- **Legacy Engine**: Throws UnsupportedOperationException when Calcite is disabled +- **Memory Overhead**: Uses row numbering and sort which may impact performance on very large result sets + +### Performance Test +- Dataset: big5 (1.16B documents, 257GB) +- Test Date: Fri Jul 25 2025 + +**Baseline Performance** +- head 1: Avg = 25ms, P90 = 26ms (39 QPS) +- head 10: Avg = 26ms, P90 = 28ms +- head 100: Avg = 32ms, P90 = 33ms + +**Reverse Command** +- reverse of head 1: Avg ≈ 38ms, P90 = 40ms +- reverse of head 10: Avg ≈ 38ms, P90 = 40ms +- reverse of head 100: Avg ≈ 44ms, P90 = 45ms +- reverse of head 9000 - 10000: ~800ms + +**Note:** +- `reverse` should only be used on smaller datasets. +- Pushdown of `reverse` is not yet implemented, so passing in a large, unfiltered dataset directly into reverse like `source=big5 | reverse | head 10` processes the entire dataset. If the dataset size passed into reverse is over 10,000 it can cause timeout by hitting the circuitbreaker. #3925 + +### Related Issues +#3873 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - COMMENTED + + +Thank you for the contribution! The implementation is well structured and thoroughly tested. I left a few comments for the discussion of my concerns. + + +### @penghuo - COMMENTED + + +PR looks good. +@selsong +1. Could you include the performance results from the current implementation in the PR description? +2. Could you create an issue to track the follow-up on window function pushdown? + + +### @penghuo - DISMISSED + + +@yuancu @LantaoJin please take another look. + + +### @LantaoJin - APPROVED + + +LGTM except the explain IT. + + +### @LantaoJin - CHANGES_REQUESTED + + +Missing test in `PPLQueryDataAnonymizerTest` + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +`__reverse_row_num__` can be a shared static constant + + +### @yuancu on `docs/user/ppl/cmd/reverse.rst:23` + + +May need another section "Version" to indicate it's only available from version 3.2.0 + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchLimitIndexScanRule.java:None` + + +Such check can be moved ahead to the matching rule of the config at `OpenSearchLimitIndexScanRule.Config.DEFAULT` + +Update: I found that it actually never step in to `hasRowNumberFunction` since a LogicalSort that reverses field `__row_number__` will not match this rule -- it fails the test `isLogicalSortLimit` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchLimitIndexScanRule.java:None` + + +Will the the digest of a logical sort ever contain `row_number`? You projected it as another column, it will refer it with its position instead of name. + +I tested with the index bank with the ppl: `source=bank | reverse | fields address`. The digest I get of this logical sort is `LogicalSort.NONE.[20 DESC](input=RelSubset#2967,sort0=$20,dir0=DESC)`. The `20` corresponds to the added column in the following logical plan: + +``` +LogicalProject(address=[$2]) + LogicalSort(sort0=[$20], dir0=[DESC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], date_value=[$4], gender=[$5], city=[$6], lastname=[$7], balance=[$8], employer=[$9], state=[$10], age=[$11], email=[$12], male=[$13], _id=[$14], _index=[$15], _score=[$16], _maxscore=[$17], _sort=[$18], _routing=[$19], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, bank]]) +``` + +The digest doesn't contain the name of the column. You need another way to examine whether it's a reverse. + + +### @penghuo on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java:12` + + +In PPL module, we usually test the parser, and verifyPPLToSparkSQL. +correctness teast should move to IT. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java:19` + + +Could u add a test in ExplainIT for reverse command? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchLimitIndexScanRule.java:None` + + +Could u explain why we need to skip limit pushdown? What is logical plan before apply rule? + + +### @selsong on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchLimitIndexScanRule.java:None` + + +Thanks for pointing this out. After revisiting the issue, I realized the bug I encountered was actually due to an unrelated change on a different local branch. You're right that the limit pushdown logic itself is valid here, so I’ve reverted the unnecessary change to limit pushdown. + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java:None` + + +In such unit tests, the correctness of the logical plan and of the Spark SQL counterpart are also usually tested. You could refer to e.g. [CalcitePPLDedupTest](https://github.com/opensearch-project/sql/blob/main/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java). + +Btw, there are some existing tables that can be used for testing. E.g. you called `super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL)` in the constructor, the `EMP` table is set up for testing. You can reuse them unless you need a special schema. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +I think this test is good in that it introduce minimum hard-coding strings. But adding the whole plan with `loadFromFile` also has its merits: code reader can directly refer to the generated plans to understand or check the correctness of the parsed query, without the need to execute the query by themselves. + +You can also add the full plans later when you implement the push-down of the command. + + +### @selsong on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java:None` + + +Thanks! I modified the code to use the existing EMR table. + + +### @selsong on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +Sounds good, I'll add the full loadFromFile later when I implement the pushdown. + + +### @penghuo on `docs/user/ppl/cmd/reverse.rst:25` + + +Could we add a note to call out limitation. +``` +The reverse command processes the entire dataset. If applied directly to millions of records, it will consume significant memory resources on the coordinating node. Users should only apply the reverse command to smaller datasets, typically after aggregation operations. +``` + + +### @selsong on `docs/user/ppl/cmd/reverse.rst:25` + + +Thanks! +1. Yes, performance results from current implementation have been added to the PR description. +2. Issue #3924 has been created to track pushdown. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +`CalcitePPLExplainIT` is just for `explain` command, please use `CalciteExplainIT` instead. + + +### @penghuo on `docs/user/ppl/cmd/reverse.rst:25` + + +add to index.rst. https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/index.rst + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:374` + + +IMO, reverse by row_number should only apply to the plan (RelNode) which doesn't contain collation. For example: +`source=t | ... | sort - age | reverse` should apply sort by age asc, rather than row_number. +You can try: +``` +RelCollation collation = context.relBuilder.peek().getTraitSet().getCollation(); +if (collation == null || collation == RelCollations.EMPTY) { + // Add ROW_NUMBER() column + RexNode rowNumber = + ... +} else { + RelCollation reversedCollation = reverseCollation(collation); + context.relBuilder.sort(reversedCollation) +} +``` +Add `reverseCollation()` to PlanUtils.class +``` +public static RelCollation reverseCollation(RelCollation original) { + if (original == null || original.getFieldCollations().isEmpty()) { + return original; + } + + List reversedFields = new ArrayList<>(); + for (RelFieldCollation field : original.getFieldCollations()) { + RelFieldCollation.Direction reversedDirection = + field.direction.reverse(); + + RelFieldCollation reversedField = new RelFieldCollation( + field.getFieldIndex(), + reversedDirection, + field.nullDirection + ); + reversedFields.add(reversedField); + } + + return RelCollations.of(reversedFields); +} +``` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:374` + + +Then add some tests: +``` +source=t | ... | sort - age | reverse +source=t | ... | sort - age, + gender | reverse +source=t | ... | sort - age | reverse | reverse +source=t | ... | sort - age, + gender | reverse | reverse +``` + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:374` + + +@LantaoJin @selsong is it correct issue? or performance imporvement? +If it is performance improvement, we can track by issue https://github.com/opensearch-project/sql/issues/3924. And raise 2nd PR. + + +### @selsong on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:374` + + +Thanks for the suggestion! This is a performance improvement, and it's already tracked under issue [#3924](https://github.com/opensearch-project/sql/issues/3924). The current implementation still produces correct results, but applying reverseCollation where applicable would avoid unnecessary computation. I'll address this enhancement in a follow-up PR. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:374` + + +I don't think my suggestion is a pushdown enhancement, but is okey to refactor in separate PR. + + +## General Comments + + +### @LantaoJin + + +@selsong Thanks for starting contribution. ~~But I doubt that we need the `reverse` command for right now since~~ +**Update:** +1. For new PPL commands, a Feature or RFC issue is required for syntax review before pull request. +2. This restriction can be related. ~~PPL as an unified query language, new commands and functions are required to be implemented both in PPL-on-OpenSearch and [PPL-on-Spark](https://github.com/opensearch-project/opensearch-spark/tree/main/ppl-spark-integration). But this won't be a problem if https://github.com/opensearch-project/sql/issues/3734 is done. Before that, new commands should be planned in release to avoid fragmented user experience. The current workaround is implementing it in two repositories.~~ +3. ~~`reverse` could cause performance crisis when handle large dataset. A pushdown or restriction solution should be considered.~~ To unblock new command, performance improvement could be separated to a follow-up PR. + + +### @yuancu + + +In the RFC #3873 , you mentioned the following case can be pushed down: + +> The optimization is triggered when the logical plan has this exact shape: +> ``` +> LogicalSort (reverse sort) +> CalciteLogicalIndexScan +> ``` +> ... +> If reverse immediately follows the source, pushdown to a descending index scan results in efficient queries. + +However, in practice, they are not pushed down. For example, the physical plan of `source=bank | reverse` is as follows: + +``` +EnumerableCalc(expr#0..14=[{inputs}], proj#0..13=[{exprs}]) + EnumerableSort(sort0=[$14], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, date_value, gender, city, lastname, balance, employer, state, age, email, male]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","date_value","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` + +Moreover, I doubt whether can it be pushed down. You added a new column to implement `reverse`. However, the new column isn't present in the datasource. Unless we support pushing down window function of `row_number()`, in which case we can push down logical sort after row number altogether, there is no way for the OpenSearch engine to sort by this non-existing column. + +Nevertheless, I think it can be pushed down when there are preceding sort -- in this way, you can not rely on the added `__row_number__` column, but on the field to reverse the order of the results. + +After fixing this, please kindly add tests in `CalciteExplainIT` to examine whether the generated physical plan is as expected. + + + +### @selsong + + +> 1. For new PPL commands, a Feature or RFC issue is required for syntax review before pull request. + +An RFC is linked to this PR. + +> 3. To unblock new command, performance improvement could be separated to a follow-up PR. + +Yes, pushdown will be separated to a follow-up PR. Ran performance tests for various reverse command scenarios using the big5 dataset on EC2. Performance test statistics have been updated under **Performance** to the PR description + + +### @yuancu + + +Looks good to me :) + + +--- + +# PR #3866: [Backport 2.19-dev] Skipping codegen and compile for Scan only plan + +**URL:** https://github.com/opensearch-project/sql/pull/3866 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-10T16:38:50Z + +**State:** MERGED + +**Merged:** 2025-07-11T05:40:53Z + +**Changes:** +165 -5 (5 files) + + +## Description + +Backport a3cd42e71d9e6b3df1e38651398ad5850d767db6 from #3853. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3864: Support pushdown physical sort operator to speedup SortMergeJoin + +**URL:** https://github.com/opensearch-project/sql/pull/3864 + +**Author:** @LantaoJin + +**Created:** 2025-07-10T10:12:14Z + +**State:** MERGED + +**Merged:** 2025-07-17T03:55:37Z + +**Changes:** +348 -253 (23 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +In Calcite + OpenSearch, the default physical Join operator is `EnumerableMergeJoin`: +``` +EnumerableMergeJoin +\-EnumerableSort + \-CalciteEnumerableIndexScan +\-EnumerableSort + \-CalciteEnumerableIndexScan +``` +After this patching, the `EnumerableSort` operators could be pushed down to DSL: +``` +EnumerableMergeJoin +\-CalciteEnumerableIndexScan(PushDownContext=[SORT->"join_key"]) +\-CalciteEnumerableIndexScan(PushDownContext=[SORT->"join_key"]) +``` + +In v3, a string field can enable sort pushdown if + +### Related Issues +Resolves #3863 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +this method is moved from `CalciteLogicalIndexScan` except these lines. @qianheng-aws + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java:None` + + +Could you elaborate more on this new field? + +Looks like it's for identify whether this `text` type has raw data. If that's the case, we should update this place as well: https://github.com/opensearch-project/sql/blob/a3cd42e71d9e6b3df1e38651398ad5850d767db6/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java#L1288-L1302 + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java:None` + + +Looks like we'd better detect whether `fielddata` firstly. And if not try to whether there is subfield of keyword type. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +Would be better to detect whether the `text` type `isFieldData()` first before finding keyword subfield, and wrapping all this logic `convertTextToKeyword`. + +And I think there is a bug in `OpenSearchTextType.convertTextToKeyword`, it doesn't make sense to always convert a field to keyword by appending string `.keyword`. It should append subfield name of keyword type, like what we implement in `PredicateAnalyzer` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java:None` + + +> Would be better to detect whether the text type isFieldData() first before finding keyword subfield + +Why? A text type could be both isFieldData and contain keyword subfield. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java:None` + + +> Could you elaborate more on this new field? + +pushdown sort to a text field neither contains keyword subfield nor fielddata=false (default value) will throw exception. + +The new field is extracting the `fielddata` flag from mapping, for example: +``` + "gender": { + "type": "text" + }, + "address": { + "type": "text", + "fielddata": true + }, + "firstname": { + "type": "text", + "fielddata": true, + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } +``` +we cannot pushdown sort `gender` to DSL. + + +### @LantaoJin on `benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/MergeArrayAndObjectMapBenchmark.java:24` + + +not related. just fix a compile error in benchmarks module. + + +### @LantaoJin on `integ-test/src/test/resources/indexDefinitions/account_index_mapping.json:12` + + +`gender` in test sample data should contain keyword subfield. If else, we cannot sorting, aggregate on it. Sorting on a `fielddata` field would get unstable result. ref [link](https://github.com/opensearch-project/sql/pull/3864#discussion_r2206876085) + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java:380` + + +this lower case values in v2 were caused by grouping by a `fielddata` field. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:137` + + +We do not sort on text type without keyword subfield in v3. Even sorting on text type with fielddata=true will get unexpected order: +For example, if `gender` is a text type field with fielddata=true. + +Check following queries and their outputs in v2. + +index=test1: +| gender | +| ------ | +| M a 1 | +| M b 1 | +| F c 2 | +| F d d 1 | + +`source = test1 | sort gender` returns +| gender | +| ------ | +| M a 1 | +| M b 1 | +| F d d 1 | +| F c 2 | + +`source = test1 | sort - gender` returns +| gender | +| ------ | +| M a 1 | +| M b 1 | +| F c 2 | +| F d d 1 | + + +index=test2: +| gender | +| ------ | +| F d d 1 | +| M a 1 | +| F c 2 | +| M b 1 | + +`source=test2 | sort gender ` returns +| gender | +| ------ | +| F d d 1 | +| M a 1 | +| M b 1 | +| F c 2 | + +`source = test2 | sort - gender` returns +| gender | +| ------ | +| M a 1 | +| M b 1 | +| F d d 1 | +| F c 2 | + +The logic of sorting a fielddata is 1) tokenization 2) internal sort by token 3) sort by first token +ASC: "F d d 1" =tokenization=> "f", "d", "d", "1" =internal sort by token=> "1", "f", "d", "d" =sort by first token=> "1" +DESC: "F d d 1" =tokenization=> "f", "d", "d", "1" =internal sort by token=> "f", "d", "d", "1" =sort by first token=> "f" + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java:62` + + +How do we introduce this disparity between with&without pushdown? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java:62` + + +I added "keyword" subfield for the `gender`, agg pushdown `by gender` works in caclite. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java:62` + + +Why pushdown enable impact results? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchSortIndexScanRule.java:24` + + +@LantaoJin @qianheng-aws couple questions? +1. Is OpenSearchSortIndexScanRule intended to be a physical tweak rule for EnumerableRel? +2. Do we still need Logical plan optimization rule and CalciteLogicalIndexScan? Can all existing rules be expressed as physical tweak rules for EnumerableRel? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java:62` + + +> Why pushdown enable impact results? + +This result difference is not introduced by code changes in this PR. You can reproduce it in main branch when change the schema from +``` + "gender": { + "type": "text", + "fielddata": true + }, +``` +to +``` + "gender": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, +``` + +The three results (v2 non-pushdown, v3 non-pushdown, v3 pushdown) are different. +| v2 non-pushdown | v3 non-pushdown | v3 pushdown | +| --- | --- | --- | +| "F", "VA", 8 | "F", "NV", 8 | "F", "AR", 8 | + +But they are all correct. The rare command is not a deterministic command, it result depends on the order of return from OpenSearch and implementation (for v2). When the bucket pushdown works (by gender), the fetched data order is different with data order in non-pushdown. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchSortIndexScanRule.java:24` + + +EnumerableSort can generate from logical sort operator and logical join operator with physical SortMerge implementation. So I changed the `OpenSearchSortIndexScanRule` rule from logical layer to physical layer. It equals keeping the `OpenSearchSortIndexScanRule` in logical layer and adding a new rule for EnumerableSort + SortMergeJoin in physical layer. + +Almost pushdown rules can move to physical layer but I didn't see any benefit to do that. We need logical plan rules. some optimization should happen in logical layer instead of physical. For example a LogicalJoin operator may generate multiple physical Join operators. rule applying in logical should be more efficient. + + +## General Comments + + +### @LantaoJin + + +Could you review this again?@qianheng-aws @penghuo + + +--- + +# PR #3862: [Backport 2.19-dev] Support struct field with dynamic disabled + +**URL:** https://github.com/opensearch-project/sql/pull/3862 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-09T16:21:16Z + +**State:** MERGED + +**Merged:** 2025-07-10T08:03:52Z + +**Changes:** +101 -2 (6 files) + + +## Description + +Backport 76b3ec2a6751cf445b0fa8e7dd09fd8b43112e10 from #3829. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3861: [Backport 2.x] Support struct field with dynamic disabled + +**URL:** https://github.com/opensearch-project/sql/pull/3861 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-09T16:21:13Z + +**State:** MERGED + +**Merged:** 2025-07-14T16:23:13Z + +**Changes:** +130 -3 (6 files) + + +## Description + +Backport 76b3ec2a6751cf445b0fa8e7dd09fd8b43112e10 from #3829. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3860: Add ClickBench IT Suite + +**URL:** https://github.com/opensearch-project/sql/pull/3860 + +**Author:** @LantaoJin + +**Created:** 2025-07-09T14:48:55Z + +**State:** MERGED + +**Merged:** 2025-07-11T06:44:19Z + +**Changes:** +962 -109 (58 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Similar to #3822 + +Add ClickBench (https://github.com/ClickHouse/ClickBench) to PPL IT suite. +Only one documentation in clickbench index. + +It prints the end2end execution times, on my laptop is: +``` +Summary of PPLClickBenchIT: +q1: 14 ms +q10: 5 ms +q11: 7 ms +q12: 7 ms +q13: 6 ms +q14: 6 ms +q15: 6 ms +q16: 4 ms +q17: 5 ms +q18: 5 ms +q19: 7 ms +q2: 9 ms +q20: 4 ms +q21: 7 ms +q22: 8 ms +q23: 8 ms +q24: 6 ms +q25: 5 ms +q26: 5 ms +q27: 6 ms +q28: 7 ms +q3: 7 ms +q30: 35 ms +q31: 7 ms +q32: 6 ms +q33: 6 ms +q34: 4 ms +q35: 6 ms +q36: 7 ms +q37: 9 ms +q38: 9 ms +q39: 9 ms +q4: 6 ms +q40: 8 ms +q41: 8 ms +q42: 8 ms +q43: 7 ms +q5: 6 ms +q6: 5 ms +q7: 6 ms +q8: 7 ms +q9: 6 ms +Total 42 queries succeed. Average duration: 7 ms +``` + +``` +Summary of CalcitePPLClickBenchIT: +q1: 23 ms +q10: 37 ms +q11: 18 ms +q12: 28 ms +q13: 15 ms +q14: 16 ms +q15: 28 ms +q16: 15 ms +q17: 14 ms +q18: 13 ms +q19: 22 ms +q2: 23 ms +q20: 8 ms +q21: 17 ms +q22: 23 ms +q23: 32 ms +q24: 24 ms +q25: 13 ms +q26: 11 ms +q27: 12 ms +q28: 26 ms +q3: 19 ms +q30: 100 ms +q31: 22 ms +q32: 23 ms +q33: 17 ms +q34: 12 ms +q35: 17 ms +q36: 17 ms +q37: 23 ms +q38: 24 ms +q39: 22 ms +q4: 15 ms +q40: 26 ms +q41: 23 ms +q42: 21 ms +q43: 38 ms +q5: 12 ms +q6: 11 ms +q7: 10 ms +q8: 19 ms +q9: 18 ms +Total 42 queries succeed. Average duration: 21 ms +``` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @noCharger - APPROVED + + +Thanks for the change. + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java:None` + + +q29: REGEXP_REPLACE() is not supported. +q43: DATE_TRUNC() is not supported. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/CalcitePPLClickBenchIT.java:None` + + +q29: REGEXP_REPLACE() is not supported. +q41: Failed in Calcite engine, TBD +q43: DATE_TRUNC() is not supported. + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q22.ppl:8` + + +Can not push down min aggregation on TEXT field in OpenSearch, commented it out. + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q43.ppl:None` + + +should be resolved by https://github.com/opensearch-project/sql/pull/3831 + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/CalcitePPLClickBenchIT.java:None` + + +Update: only q29 is ignored. +q29: REGEXP_REPLACE() is not supported. +q41: a bug of Sarg, fixed in latest commit +q43: DATE_TRUNC() is not supported. Using DATE_FORMAT() instead. ref [link](https://github.com/ClickHouse/ClickBench/blob/af08ebd8ba4601370d4ce49c38090ee80773e03d/mysql/queries.sql#L43) + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q43.ppl:2` + + +use `DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00')` instead of `DATE_TRUNC('minute', EventTime)`. ref [link](https://github.com/ClickHouse/ClickBench/blob/af08ebd8ba4601370d4ce49c38090ee80773e03d/mysql/queries.sql#L43) + + +### @noCharger on `integ-test/src/test/resources/clickbench/queries/q2.ppl:4` + + +In PPL, should we treat command equivalently? + +``` +source=hits | where AdvEngineID!=0 +``` + +and + +``` +source=hits AdvEngineID!=0 +``` + + + + +### @noCharger on `integ-test/src/test/resources/clickbench/queries/q25.ppl:8` + + +From the query plan, this one has a separate LimitOperator, while ` +...| head 10 | fields SearchPhrase` doesn't. Is this a bug? + + + +### @noCharger on `integ-test/src/test/resources/clickbench/queries/q39.ppl:5` + + +@penghuo can you confirm this transfer needs `streamstats` command? + +``` +| sort _time| streamstats count as row_number| where row_number > 1000 AND row_number <= 1010 +``` + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q2.ppl:4` + + +> In PPL, should we treat command equivalently? +> +> ``` +> source=hits | where AdvEngineID!=0 +> ``` +> +> and +> +> ``` +> source=hits AdvEngineID!=0 +> ``` + +With current syntax implementation, they are totally the same. So we can rewrite the SQL to both of them. + +For SPL syntax we may follow in future, not only _logical-expression_ , but also _[index-expression](https://docs.splunk.com/Documentation/Splunk/9.4.2/SearchReference/Search#Index_expression_options)_ can follow the index as well. For example `index=hits 'abc'` equals to `source=hits query_string(..., 'abc')` + +To clarify the difference, I prefer to translate SQL `WHERE AdvEngineID <> 0` to PPL `| where AdvEngineID!=0 `. + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q25.ppl:8` + + +> From the query plan, this one has a separate LimitOperator, while ` ...| head 10 | fields SearchPhrase` doesn't. Is this a bug? + +I think it lack of a proper optimize rule in v2's optimizer. You can try explain it in enabling calcite, in v3, `sort`, `project`, `limit` are all pushed down (only TableScan in physical plan). + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q25.ppl:8` + + +Besides, about the query +``` +source=hits +| head 10 +| where SearchPhrase != '' +| sort EventTime +| fields SearchPhrase +``` +IMO, the plan in v2 is incorrect. + +In v3, the sort won't be pushed down, because in SQL `SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;` the `LIMIT 10` always executes after `ORDER BY`, [ref](https://www.geeksforgeeks.org/sql/order-of-execution-of-sql-queries/). But the PPL result should be piped, so the sort operator should only take effort on output of `.. | head 10 | where ...| `. + +``` +source=hits +| head 10 +| where SearchPhrase != '' +| sort EventTime +| fields SearchPhrase +``` +should equal to +``` +SELECT SearchPhrase FROM ( + SELECT * FROM hits WHERE SearchPhrase <> '' LIMIT 10 +)t ORDER BY EventTime; +``` + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q39.ppl:5` + + +No. `streamstats count as row_number` should be similar to SQL `COUNT(*) OVER (ORDER BY _time)` with PRECEDING to CURRENT window frame. + + +### @LantaoJin on `integ-test/src/test/resources/clickbench/queries/q25.ppl:8` + + +I have discussed this with @penghuo , maybe user cannot realize they are different. So enable push down the `head 10` for query is TBD. +``` +source=hits +| head 10 +| where SearchPhrase != '' +| sort EventTime +| fields SearchPhrase +``` + + +### @noCharger on `integ-test/src/test/resources/clickbench/queries/q25.ppl:8` + + +Verified query plan on calcite. + + +## General Comments + + +### @penghuo + + +@noCharger please review it + + +--- + +# PR #3859: Filter script pushdown with RelJson serialization in Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3859 + +**Author:** @songkant-aws + +**Created:** 2025-07-09T10:29:53Z + +**State:** MERGED + +**Merged:** 2025-07-22T03:09:01Z + +**Changes:** +1704 -122 (49 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +The work on top of preivous Calcite filter script pushdown: https://github.com/opensearch-project/sql/pull/3812. +The issue of previous PR is it sends unsafe arbitrary Java code string to OpenSearch DataNode to compile it, leading to security concern where malicious user could use this exposed script field to run any code to attack cluster. + +In this PR, it leverages Calcite existing RexNode serialization mechanism to mitigate security issue. We're now transporting logical expression representation, aka RexNode that will not allow user to customize arbitrary programming logic. The registered function calls are limited to PPLBuiltin UDFs and Calcite standard UDFs. The pushdown now undergoes this way: + +|-----------------Coordinator Node -----------------| +optimized RexNode -> encoded RexNode Json String + +---transport call--> + +|--------------------------------------------------------Data Node -----------------------------------------------------| +decoded RexNode Json String -> deserialize as RexNode -> translate to Linq4j expressions -> code compile expressions and run + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3379 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +Calcite related RexNode and RelDataType serialization + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +Calcite RexNode and RelDataType deserialization related logic + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/OpenSearchRelInputTranslator.java:None` + + +Customized handler to handle RexNode's RelInput + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +It brings me that we can leverage jackson-dataformat-msgpack which is a Jackson extension library to serialize the json to binary. (could be faster and memory saving) +``` +private static final ObjectMapper mapper = new ObjectMapper(new MessagePackFactory()); +``` + +Ref https://github.com/msgpack/msgpack-java/blob/main/msgpack-jackson/README.md + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/OpenSearchRelInputTranslator.java:None` + + +We can remove this handling logic or throw exception if we won't really have local reference. + +It's inappropriate to put input's RowType in local ref + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/OpenSearchRelInputTranslator.java:None` + + +Yes, we can remove it. I don't see we have such LocalRef case. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +I checked the Calcite RelJson code. Unfortunately, it doesn't rely on ObjectMapper. It implements its own JsonBuilder to write RexNode to json string. So we cannot leverage this msgpack extension. + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java:179` + + +Change to more complex ppl query because now most of EVAL filters can be pushed down as script expression. Adding agg before filters will prevent them pushdown. + + +### @penghuo on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:442` + + +can we avoid compare all function properties? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.json:None` + + +`SCRIPT->AND(=($0, 'Amber'), =(-($1, 2), 30))` seems not the final plan, we need https://github.com/opensearch-project/sql/pull/3850 first. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:438` + + +please add the test: +`source=opensearch-sql_test_index_account | where firstname != ''` + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CompoundedScriptEngine.java:27` + + +> just like build-in painless language + +what is user interface looks like in rest api? + + +### @penghuo on `plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java:None` + + +RexOptCluster can be consturcted in CompoundedScriptEngine? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CompoundedScriptEngine.java:None` + + +Can we get rid of options? The concern is we can not run PPL library as a client out of cluster. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +Does this class copy from Calcite? If not, add doc to explain class and method, add UT to test key features. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +serialization -> serde + + +### @penghuo on `opensearch/build.gradle:None` + + +Calcite already depend on janino https://mvnrepository.com/artifact/org.apache.calcite/calcite-core/1.40.0 + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:223` + + +if getValidatedReferenceNameAndType throw UnsupportedScriptException, fallback to no-pushdown logic? + + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:621` + + +Add UT. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +Does it means user can any valid function in this List? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +why use analyze_ as function name? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +fieldTypes should only include filed used in RexNode, right? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +Is Base64 encoding necessary? Did we actually implement a separate script execution based on RexNode? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:135` + + +How are RexExecutable and RexExecutorImpl used in Calcite? Do they follow the same execution path as code generation? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CompoundedScriptEngine.java:None` + + +We can make the Calcite script engine as the default engine type. Then rest client can invoke it as well without passing any options + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CompoundedScriptEngine.java:27` + + +rest api doesn't expose ENGINE_TYPE option yet. rest api hardcodes default language as painless script. If user specifies ENGINE_TYPE, he will get exception now. + +This class's option selection is for internal V2 fallback in case of unsupported queries in Calcite for NodeClient. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:135` + + +They are used to reduce constant expressions in Calcite. + +If we are talking about the whole plan code-gen, that is more complicated since it includes code-gen for RelNode. But the path of code-gen for RexNode are the same by using `RexToLixTranslator` + + +### @qianheng-aws on `opensearch/build.gradle:None` + + +Yeah, it's not necessary anymore after this change: https://github.com/opensearch-project/sql/pull/3859/commits/bc6ecac06599b1646a460d0846900cdcb953b777#diff-59ef715f591a9e9937469905395d8ef89b941c5098176d0906148f608dc81743L80 + +It's necessary before since I want to import the class of janino in our implementation. This change makes janino only necessary in runtime. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serialization/RelJsonSerializer.java:None` + + +Yes, for simplicity. If we want least restrictive function access, we can manually pick needed functions from those libraries to a separate operator table. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java:223` + + +Yes, it will fallback to no-pushdown logic. + + +## General Comments + + +### @LantaoJin + + +@songkant-aws I remember you mentioned "Calcite has the RexNode serialization related implementation", can you point it out here? + + +### @songkant-aws + + +@LantaoJin See comments inline for Calcite RexNode serialization usage by JSON. + + +### @songkant-aws + + +There is no much difference for local Big5IT benchmark report because Big5 ITs don't have script pushdown case. + +CalciteBig5IT: + +Summary: +query_string_on_message_filtered: 153 ms +sort_numeric_asc_with_match: 42 ms +terms_significant_2: 96 ms +desc_sort_timestamp_can_match_shortcut: 22 ms +query_string_on_message_filtered_sorted_num: 43 ms +terms_significant_1: 23 ms +sort_numeric_desc: 15 ms +range_numeric: 16 ms +sort_keyword_can_match_shortcut: 16 ms +desc_sort_with_after_timestamp: 12 ms +range_field_disjunction_big_range_small_term_query: 18 ms +composite_terms_keyword: 23 ms +keyword_in_range: 28 ms +asc_sort_timestamp_no_can_match_shortcut: 15 ms +asc_sort_with_after_timestamp: 11 ms +composite_terms: 19 ms +desc_sort_timestamp: 10 ms +range_with_desc_sort: 15 ms +composite_date_histogram_daily: 30 ms +range: 13 ms +sort_numeric_asc: 11 ms +query_string_on_message: 12 ms +default: 8 ms +date_histogram_hourly_agg: 12 ms +range_with_asc_sort: 16 ms +asc_sort_timestamp_can_match_shortcut: 14 ms +term: 9 ms +multi_terms_keyword: 23 ms +sort_keyword_no_can_match_shortcut: 12 ms +desc_sort_timestamp_no_can_match_shortcut: 12 ms +range_field_conjunction_big_range_big_term_query: 13 ms +date_histogram_minute_agg: 16 ms +sort_numeric_desc_with_match: 13 ms +range_field_conjunction_small_range_big_term_query: 9 ms +asc_sort_timestamp: 9 ms +keyword_terms: 12 ms +scroll: 7 ms +range_field_conjunction_small_range_small_term_query: 10 ms +keyword_terms_low_cardinality: 12 ms +Total 39 queries succeed. Average duration: 21 ms + +PPLBig5IT: + +Summary: +asc_sort_timestamp: 25 ms +range_field_conjunction_small_range_small_term_query: 69 ms +term: 9 ms +composite_terms_keyword: 60 ms +sort_numeric_desc_with_match: 20 ms +range_numeric: 8 ms +terms_significant_2: 11 ms +desc_sort_timestamp_no_can_match_shortcut: 6 ms +sort_numeric_asc: 6 ms +composite_date_histogram_daily: 20 ms +desc_sort_with_after_timestamp: 6 ms +keyword_terms_low_cardinality: 11 ms +keyword_in_range: 8 ms +range_field_disjunction_big_range_small_term_query: 8 ms +range_with_asc_sort: 8 ms +scroll: 7 ms +asc_sort_timestamp_can_match_shortcut: 5 ms +query_string_on_message_filtered: 30 ms +default: 5 ms +date_histogram_hourly_agg: 6 ms +range_field_conjunction_big_range_big_term_query: 6 ms +query_string_on_message: 6 ms +range: 6 ms +keyword_terms: 6 ms +range_field_conjunction_small_range_big_term_query: 6 ms +composite_terms: 13 ms +multi_terms_keyword: 8 ms +desc_sort_timestamp: 5 ms +asc_sort_timestamp_no_can_match_shortcut: 4 ms +sort_numeric_asc_with_match: 3 ms +terms_significant_1: 6 ms +range_with_desc_sort: 7 ms +sort_keyword_no_can_match_shortcut: 4 ms +query_string_on_message_filtered_sorted_num: 6 ms +desc_sort_timestamp_can_match_shortcut: 5 ms +sort_keyword_can_match_shortcut: 4 ms +sort_numeric_desc: 5 ms +asc_sort_with_after_timestamp: 5 ms +date_histogram_minute_agg: 7 ms +Total 39 queries succeed. Average duration: 11 ms + + +### @LantaoJin + + +> There is no much difference for local Big5IT benchmark report because Big5 ITs don't have script pushdown case. + +Maybe the optimization could impact clickbench queries (#3860) + + +### @LantaoJin + + +High-level question: have you solved how to balance the cost computing for both script and filter pushdown case? +For example, the [q11](https://github.com/opensearch-project/sql/pull/3860/files#diff-d5d82b49db77d27d281186e1368dac6cdee45b2ac74c0da236ed2f186f00b696) can both be effect by script and filter pushdown. + + +### @penghuo + + +> There is no much difference for local Big5IT benchmark report because Big5 ITs don't have script pushdown case. + +Do we have benchmark results for queries with script pushdown? If so, what are the results?" + + + + +### @songkant-aws + + +@LantaoJin @penghuo Here is the ClickBenchIT result. + +**Four queries in ClickBenchIT hit Calcite script pushdown** + +**Five rounds query before script pushdown:** + +q21: 19ms 18ms 18ms 19ms 18ms +q22: 25ms 26ms 25ms 25ms 25ms +q23: 34ms 34ms 33ms 35ms 35ms +q24: 25ms 25ms 25ms 25ms 25ms + +**Five rounds query after script pushdown:** + +q21: 14ms 14ms 14ms 14ms 14ms +q22: 24ms 23ms 22ms 22ms 25ms +q23: 27ms 28ms 27ms 28ms 26ms +q24: 23ms 20ms 23ms 22ms 21ms + +``` +| Avg diff | Percentage change | +| q21: 18.4ms -> 14ms | -23.9% | +| q22: 25.2ms -> 23.2ms | -7.9% | +| q23: 34.2ms -> 27.2ms | -20.5% | +| q24: 25ms -> 21.8ms | -12.8% | +``` + +**The overall ClickBenchIT Summary** + +``` +CalcitePPLClickBenchIT Summary: + +q1: 21 ms +q10: 36 ms +q11: 21 ms +q12: 39 ms +q13: 17 ms +q14: 17 ms +q15: 18 ms +q16: 15 ms +q17: 16 ms +q18: 12 ms +q19: 21 ms +q2: 23 ms +q20: 9 ms +q21: 14 ms +q22: 25 ms +q23: 26 ms +q24: 21 ms +q25: 13 ms +q26: 11 ms +q27: 12 ms +q28: 27 ms +q3: 20 ms +q30: 103 ms +q31: 23 ms +q32: 23 ms +q33: 18 ms +q34: 11 ms +q35: 18 ms +q36: 17 ms +q37: 23 ms +q38: 22 ms +q39: 22 ms +q4: 16 ms +q40: 27 ms +q41: 37 ms +q42: 22 ms +q43: 25 ms +q5: 13 ms +q6: 11 ms +q7: 11 ms +q8: 19 ms +q9: 19 ms +Total 42 queries succeed. Average duration: 21 ms +``` + +``` +PPLClickBenchIT Summary: + +q1: 13 ms +q10: 5 ms +q11: 8 ms +q12: 6 ms +q13: 7 ms +q14: 7 ms +q15: 6 ms +q16: 4 ms +q17: 5 ms +q18: 4 ms +q19: 8 ms +q2: 8 ms +q20: 5 ms +q21: 5 ms +q22: 8 ms +q23: 10 ms +q24: 6 ms +q25: 6 ms +q26: 6 ms +q27: 6 ms +q28: 8 ms +q3: 7 ms +q30: 46 ms +q31: 5 ms +q32: 5 ms +q33: 5 ms +q34: 4 ms +q35: 6 ms +q36: 7 ms +q37: 17 ms +q38: 9 ms +q39: 11 ms +q4: 6 ms +q40: 8 ms +q41: 8 ms +q42: 8 ms +q43: 7 ms +q5: 5 ms +q6: 7 ms +q7: 5 ms +q8: 7 ms +q9: 7 ms +Total 42 queries succeed. Average duration: 7 ms +``` + + +### @songkant-aws + + +> High-level question: have you solved how to balance the cost computing for both script and filter pushdown case? +> For example, the [q11](https://github.com/opensearch-project/sql/pull/3860/files#diff-d5d82b49db77d27d281186e1368dac6cdee45b2ac74c0da236ed2f186f00b696) can both be effect by script and filter pushdown. + +Not yet. Maybe we can do a followup to understand which case should prefer filter to script. For now, if filters can be translated to term query, they are preferred anyway. But I'm not sure if there is a case of translatable filter fallback to script. + + +### @qianheng-aws + + +LGTM for this PR. + +And there are 2 followups in my mind: +1. Cost computing for script push down and primitive push down, see more info in this comment: https://github.com/opensearch-project/sql/issues/3379#issuecomment-3051689591 +2. Combing multiple script query expressions into one, if there are. I think it should be inefficiency to compile and execute them separately. + + +### @LantaoJin + + +> 1. Cost computing for script push down and primitive push down, see more info in this comment: [[FEATURE] Calcite Engine Framework: Pushdown scripts #3379 (comment)](https://github.com/opensearch-project/sql/issues/3379#issuecomment-3051689591) + +Before going to details, I have mentioned in https://github.com/opensearch-project/sql/pull/3859#issuecomment-3056832284, I am not sure we could separate the cost computing as a followup, without it, the feature could cause performance regression. + + +### @songkant-aws + + +Added jmh benchmark for expression serialization + +``` +# JMH version: 1.35 +# VM version: JDK 21.0.5, OpenJDK 64-Bit Server VM, 21.0.5+11-LTS +# VM invoker: /Library/Java/JavaVirtualMachines/amazon-corretto-21.jdk/Contents/Home/bin/java +# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/songkant/workspace/opensearch-project-dev/sql/benchmarks/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant +# Blackhole mode: compiler (auto-detected, use -Djmh.blackhole.autoDetect=false to disable) +# Warmup: 1 iterations, 10 s each +# Measurement: 10 iterations, 10 s each +# Timeout: 10 min per iteration +# Threads: 1 thread, will synchronize iterations +# Benchmark mode: Average time, time/op +# Benchmark: org.opensearch.sql.expression.operator.predicate.ExpressionScriptSerdeBenchmark.testRexNodeJsonSerde + +# Run progress: 74.14% complete, ETA 00:05:01 +# Fork: 1 of 1 +# Warmup Iteration 1: SLF4J(W): No SLF4J providers were found. +SLF4J(W): Defaulting to no-operation (NOP) logger implementation +SLF4J(W): See https://www.slf4j.org/codes.html#noProviders for further details. +18048.349 ns/op +Iteration 1: 15960.460 ns/op +Iteration 2: 15886.672 ns/op[14m 57s] +Iteration 3: 15931.590 ns/op[15m 7s] +Iteration 4: 15969.726 ns/op[15m 17s] +Iteration 5: 16194.336 ns/op[15m 27s] +Iteration 6: 16280.464 ns/op[15m 37s] +Iteration 7: 15936.191 ns/op[15m 47s] +Iteration 8: 16003.951 ns/op[15m 57s] +Iteration 9: 15977.911 ns/op[16m 7s] +Iteration 10: 15953.869 ns/op[16m 17s] + + +Result "org.opensearch.sql.expression.operator.predicate.ExpressionScriptSerdeBenchmark.testRexNodeJsonSerde": + 16009.517 ±(99.9%) 190.056 ns/op [Average] + (min, avg, max) = (15886.672, 16009.517, 16280.464), stdev = 125.710 + CI (99.9%): [15819.462, 16199.573] (assumes normal distribution) + + +# JMH version: 1.35 +# VM version: JDK 21.0.5, OpenJDK 64-Bit Server VM, 21.0.5+11-LTS +# VM invoker: /Library/Java/JavaVirtualMachines/amazon-corretto-21.jdk/Contents/Home/bin/java +# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/songkant/workspace/opensearch-project-dev/sql/benchmarks/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant +# Blackhole mode: compiler (auto-detected, use -Djmh.blackhole.autoDetect=false to disable) +# Warmup: 1 iterations, 10 s each +# Measurement: 10 iterations, 10 s each +# Timeout: 10 min per iteration +# Threads: 1 thread, will synchronize iterations +# Benchmark mode: Average time, time/op +# Benchmark: org.opensearch.sql.expression.operator.predicate.ExpressionScriptSerdeBenchmark.testV2ExpressionSerde + +# Run progress: 83.62% complete, ETA 00:03:10 +# Fork: 1 of 1 +# Warmup Iteration 1: 76917.475 ns/op +Iteration 1: 73595.154 ns/op[16m 37s] +Iteration 2: 73156.246 ns/op[16m 47s] +Iteration 3: 73528.585 ns/op[16m 57s] +Iteration 4: 73522.270 ns/op[17m 7s] +Iteration 5: 73298.841 ns/op[17m 17s] +Iteration 6: 73564.956 ns/op[17m 27s] +Iteration 7: 75099.827 ns/op[17m 37s] +Iteration 8: 73680.223 ns/op[17m 47s] +Iteration 9: 73437.472 ns/op[17m 57s] +Iteration 10: 75394.735 ns/op[18m 7s] + + +Result "org.opensearch.sql.expression.operator.predicate.ExpressionScriptSerdeBenchmark.testV2ExpressionSerde": + 73827.831 ±(99.9%) 1158.471 ns/op [Average] + (min, avg, max) = (73156.246, 73827.831, 75394.735), stdev = 766.257 + CI (99.9%): [72669.360, 74986.302] (assumes normal distribution) +``` + + + +### @LantaoJin + + +@songkant-aws can you fix the conflicts, I think the enhancement has been blocked for too long. @penghuo @qianheng-aws any other concerns? can we merge it first? + + +### @qianheng-aws + + +LGTM. Please resolve the conflict @songkant-aws + + +### @LantaoJin + + +@songkant-aws please manually backport it to 2.19-dev + + +--- + +# PR #3858: [Backport 2.19-dev] [BugFix] Fix relevance query function over optimization issue in ReduceExpressionsRule + +**URL:** https://github.com/opensearch-project/sql/pull/3858 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-09T06:13:51Z + +**State:** MERGED + +**Merged:** 2025-07-09T17:02:13Z + +**Changes:** +162 -87 (12 files) + + +## Description + +Backport fed92a33fb3cd3883ac95fa5dfa1501b16233c87 from #3851. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@songkant-aws can you check the CI failure + + +### @songkant-aws + + +@LantaoJin Related to flaky test issue. Should be addressed by this PR: https://github.com/opensearch-project/sql/pull/3846 but it's not merged yet. + + +### @LantaoJin + + +> @LantaoJin Related to flaky test issue. Should be addressed by this PR: #3846 but it's not merged yet. + +Thanks, let me retest. + + +--- + +# PR #3857: [Backport 2.19-dev] Add big5 to IT Suite + +**URL:** https://github.com/opensearch-project/sql/pull/3857 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-08T18:12:30Z + +**State:** MERGED + +**Merged:** 2025-07-09T03:15:00Z + +**Changes:** +722 -11 (49 files) + + +## Description + +Backport d94e4de38b5ab2cc5959c1375324b92361b91bc8 from #3822. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3854: Default to UTC for date/time functions across PPL and SQL + +**URL:** https://github.com/opensearch-project/sql/pull/3854 + +**Author:** @yuancu + +**Created:** 2025-07-07T10:46:35Z + +**State:** MERGED + +**Merged:** 2025-07-23T08:35:25Z + +**Changes:** +157 -94 (18 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +Datetime functions in PPL/SQL has been using cluster's JVM time zone as the default timezone. For example, if the cluster resides in PST and it's 13:00:00 there, `SELECT CURRENT_TIME()` will return the the current timestamp as `13:00:00`. + +However, this creates a mismatch with the data stored in OpenSearch, as `[dates are internally stored in the UTC format](https://docs.opensearch.org/docs/latest/field-types/supported-field-types/date/)`. Therefore, when comparing dates generated with functions with those stored in OpenSearch, there will be a mismatch in their timezone. Examples are detailed in #3725. + +This PR unifies all date / time representations in PPL / SQL to UTC. As a result, `SELECT CURRENT_TIME()` will return the current time in UTC timezone; in query: `... | eval t = timestamp('2025-07-07 13:00:00') | where date > t`, `t` will be regarded as `2025-07-07T13:00:00.000Z` in UTC. + +This will fix existing problems of comparing with now, where the current zoned timestamp in the cluster's time zone will be directly compared with an UTC timestamp from the index. + +As the next step, we should allow users to specify timezone per query or cluster-wise. + +### Alternative solutions that I have considered + +We can still use zoned time in PPL / SQL, but only make the conversion when interacting with the data from the index. This includes: +- Convert the UTC date to cluster timezone when read from OpenSearch +- When comparing with `now()` or literal, pushing down the time zone as a parameter as well (or convert the now / literal to UTC before pushing down). +- ... + +I did not implement this solution since users will read a data that's different from what they stored, and they have no control over it. + +### Comparison with other databases / languages + +- Most SQL databases have session controls. Date/time are returned in the session's time zone. If time zone not specified, dates will be returned in UTC time zone. +- Elasticsearch supports `time_zone` argument in some DSL queries like range query, which will convert the date/time in the query to UTC before comparing with dates in the index. If not specified, all dates are regarded as in UTC time zone. E.g. `SELECT NOW()` will return a UTC time. +- [Splunk treats date/time literals that did not specify a time zone as in cluster's time zone](https://help.splunk.com/en/splunk-cloud-platform/search/spl2-search-manual/dates-and-time/time-zones). Date / time are converted to UNIX time for processing. + +### Related Issues +Resolves #3725 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `docs/user/ppl/functions/datetime.rst:1313` + + +I don't think this changes should impact any SYS* datetime functions. SYSDATE() should be expressed in the same timezone as the server is on. + +We want to change the current_timestamp/now to UTC since we don't have a session in OS. But would not change the SYS* functions. ref https://database-heartbeat.com/2021/09/28/sysdate-vs-current_date-in-oracle-database/ + + +### @LantaoJin on `docs/user/ppl/functions/datetime.rst:1314` + + +we need update user doc to highlight the new behaviours for any impacted functions. for example, `now` + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:1313` + + +Set `sysdate` to use cluster time zone. + +Originally, `sysdate` only differs with `now` in that it returns execution timestamp instead of query start timestamp. Now it differs with `now` in one more way (time zone). I've made it clear in the doc. Is this change expected? + +Two more functions that worry me are `localtime` and `localtimestamp`. They'll return UTC timestamp instead of *local* timestamp. This may be confusing for users. I think it's because we followed [the function definition of MySql](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_localtimestamp), where it says *[LOCALTIME](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_localtime) and [LOCALTIME()](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_localtime) are synonyms for [NOW()](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_now)*. However, since we have altered the definition of `now`, the `local*` functions no longer points to the *local* time zone. + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:1314` + + +Fixed. Explicitly mentioned UTC time-zone for all now-like functions. + + +### @LantaoJin on `docs/user/ppl/functions/datetime.rst:1313` + + +It's okey as long as keep the behaviours of localtime() as same as now(). maybe a better way is marking local* functions deprecated since now we don't have any session management. + + +### @penghuo on `docs/user/ppl/functions/datetime.rst:None` + + +rephrase it, --> CURDATE() returns the current date in UTC at the time the statement is executed. + + +### @penghuo on `docs/user/ppl/functions/datetime.rst:250` + + +We should add a Note section at Top of datatime to describe UTC behaviour. + + +### @penghuo on `docs/user/ppl/functions/datetime.rst:None` + + +> SYSDATE() returns the time at which it executes in the cluster's time zone. + +It should returns the current date in UTC. + + +### @penghuo on `core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeTest.java:57` + + +Timezone "America/Los_Angeles" is only for testing right? By default DSL.datetime always use UTC timezone? + + +### @yuancu on `core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeTest.java:57` + + +Yes, "America/Los_Angeles" is only for testing. `DSL.datetime(datetime, timezone)` converts the datetime (assumed to be in UTC) to the specified timezone. If there isn't a timezone specified -- `DSL.datetime(datetime)`, it uses UTC timezone. + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:None` + + +@LantaoJin [argued](https://github.com/opensearch-project/sql/pull/3854#discussion_r2191394393) that `SYSDATE` should return the current date at server's time zone, as how [Oracle's SYSDATE ](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/SYSDATE.html)does. + +In [MySql](https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_sysdate), SYSDATE does not differ with NOW w.r.t. timezone. + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:250` + + +Added a block with the node directive. Do you think it is detailed enough? + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:None` + + +Fixed + + +### @yuancu on `core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeTest.java:57` + + +One proposal related to this test: I think `ExprTimestampValue` should hold a `LocalTimestamp` instead of an `Instant`, since it just represents a timestamp without zone information. + +For example, the function `datetime(datetime, timezone)` converts a timestamp to another timezone. But if representing the input and output timestamp with an `Instant`, it will remain the same. In the code, there are also many spoiler plates to add a timezone to a LocalDateTime in order to work with ExprTimestampValue. + + +### @LantaoJin on `docs/user/ppl/functions/datetime.rst:None` + + +I have checked the behaviours in Oracle Autonomous Database. The default time zone is UTC and by default calls to SYSDATE and SYSTIMESTAMP return the date and time in UTC. [ref](https://docs.oracle.com/en/cloud/paas/autonomous-database/serverless/adbsb/autonomous-initialization-parameters.html#GUID-1D5E830F-2986-4E6C-AF8F-899AC3C85D07). let's update it to UTC. @yuancu + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:None` + + +Updated the implementation. + +Besides, I found a bug which may be annoying for `SYSDATE` users -- multiple calls to `SYSDATE` with the same parameter within a query will return the same value. + +`SYSDATE` is expected to return different values when it is called within the same query. +E.g. `sd1` and `sd2` are different in this query: `source=dates | eval sd1 = sysdate(5) | sort date | eval sd2 = sysdate(6) | fields sd1, sd2` + +| sd1 | sd2 | +| ----| ---- | +| 2025-07-22 06:16:33.13648 | 2025-07-22 06:16:33.136559 | + +However, this query gives the same result: `source=dates | eval sd1 = sysdate(6) | sort date | eval sd2 = sysdate(6) | fields sd1, sd2` + +| sd1 | sd2 | +| ----| ---- | +| 2025-07-22 06:19:07.006268 | 2025-07-22 06:19:07.006268 | + +The physical plan gives a hint to the problem: +``` +EnumerableCalc(expr#0=[{inputs}], expr#1=[6], expr#2=[SYSDATE($t1)], sd1=[$t2], sd2=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, dates]], PushDownContext=[[PROJECT->[date], SORT->[{ + "date" : { + "order" : "asc", + "missing" : "_first" + } +}]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["date"],"excludes":[]},"sort":[{"date":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` + +Both `sd1` and `sd2` point to `$t2`. + +Will raise another issue to fix it. + + +### @penghuo on `docs/user/ppl/functions/datetime.rst:None` + + +> unless otherwise specified? + +In which case? + + +### @yuancu on `docs/user/ppl/functions/datetime.rst:None` + + +Thanks for reminding. `SYSDATE` was an exception. Now we changed it to UTC as well. I'll rephrase the note to "All PPL date and time functions use the UTC time zone" + + +## General Comments + + +### @penghuo + + +> This will be a break change, and may be counter-intuitive for some since users will get a timestamp that's different from their local timestamp. + +Could we avoid breaking change by providing timezone setting? e.g. default value is jvm timezone. + +> As the next step, we should allow users to specify timezone per query or per session. + +Timezone query paramater will overide timezone setting, right? + +> Default to UTC for date/time functions across PPL and SQL + +I agree, timestamp Intenal representation should always be epoch value. +my proposal is apply timezone for display and parsing timestamp string literal only + + +### @yuancu + + +> Timezone query paramater will overide timezone setting, right? + +Yes + +> My proposal is apply timezone for display and parsing timestamp string literal only + +In this case, if a user feed in a timestamp string, should we regard it as a UTC timestamp or a timestamp at user's time zone? + +I feel it will be more intuitive to treat it as in user's time zone. Since a user will get what they stores when they read it back from the index. + +``` + -------convert---> | ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄| +user (local) | index (UTC) | + <----convert back- |______________| +``` + +The downside: It makes the user interface different from what's stored in the index. This may be a little weird for a query language. + +> Could we avoid breaking change by providing timezone setting? e.g. default value is jvm timezone. + +I am considering adding such a setting. There may be three ways to achieve so: +- setting per query +- setting per session +- a cluster-wide setting + +They all have their merits and disadvantages. It may be too verbose or troublesome if a user has to specify a timezone per query. Enabling cluster-wise setting allows users to specify timezone once and use it forever, although clients from different time zones have to use the same time-zone set by the server. Using session-timezone / profile-timezone may be a great middle ground. However, introducing such an involved concept purely for timezone settings may be an overkill. + +Of course, we can use a combination of them -- e.g. enable cluster-wide timezone setting, but also support overriding it with query parameters. + + +--- + +# PR #3853: Skipping codegen and compile for Scan only plan + +**URL:** https://github.com/opensearch-project/sql/pull/3853 + +**Author:** @LantaoJin + +**Created:** 2025-07-07T10:29:40Z + +**State:** MERGED + +**Merged:** 2025-07-10T16:38:34Z + +**Changes:** +165 -5 (5 files) + +**Labels:** `enhancement`, `performance`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Currently, when the Calcite engine is enabled, **​​all query plans**​​ -- regardless of their structure -- are converted to `EnumerableRel`. This means the plan is transformed into a `Linq4j` Expression, generates Java code, and undergoes just-in-time (JIT) compilation before execution. + +For ​​simple or fully pushdown-compatible queries​​ (e.g., `source=t`, `source=t | where a=1`, `source=t | sort a`, or `source=t | where a=1 | sort b | head 10`), the optimized plans only contain node `EnumerableTableScan`. Despite their simplicity, these plans ​​unnecessarily undergo code generation and dynamic compilation​​, introducing overhead. + +To ​​reduce codegen and compilation time​​, we propose converting such plans to `BindableRel` instead of `EnumerableRel`. This bypasses code generation and compilation entirely, ​​improving plan execution time by **~30%** (By benchmarking the simple queries by https://github.com/opensearch-project/sql/pull/3822) + +### Related Issues +Resolves #3852 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/Scannable.java:17` + + +Nit: Add javadoc to explain + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:173` + + +Nit: Looks like this method is marked as `@Deprecated`. Use withPrepareFactory() in OpenSearchDriver instead? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/plan/Scannable.java:17` + + +done + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:173` + + +Actually we cannot use `withPrepareFactory` since `withPrepareFactory` will return a `Driver` instead of `OpenSearchDriver`. We need the added `connect()` method of `OpenSearchDriver`. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:173` + + +Em, maybe we could with casting. +``` + try { + OpenSearchDriver driver = (OpenSearchDriver) new OpenSearchDriver().withPrepareFactory(OpenSearchPrepareImpl::new); + return driver.connect("jdbc:calcite:", info, null, typeFactory); + } catch (SQLException e) { + throw new RuntimeException(e); + } +``` +Anyway, let' keep current `createPrepareFactory()` unless it is removed in future. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:256` + + +add a comments if copy from calcite? e.g. +``` +// START FROM CALCITE + +// END FROM CALCITE +``` + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:300` + + +Can we leverage BindableConvention.INSTANCE and regsiter a rule for convertion? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:300` + + +No, Bindable and Enumerable conventions cannot work together. The `resultConvention` of physical plan can only be one of them. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3851: [BugFix] Fix relevance query function over optimization issue in ReduceExpressionsRule + +**URL:** https://github.com/opensearch-project/sql/pull/3851 + +**Author:** @songkant-aws + +**Created:** 2025-07-07T02:05:30Z + +**State:** MERGED + +**Merged:** 2025-07-09T06:13:32Z + +**Changes:** +162 -87 (12 files) + +**Labels:** `bug`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Fix relevance query function over optimization issue in ReduceExpressionsRule + +### Related Issues +Related to https://github.com/opensearch-project/sql/pull/3834 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/UserDefinedFunctionBuilder.java:48` + + +can you combine the methods toUDF in L35? We don't want to keep diff implementations. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java:177` + + +why we change this IT case? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +can you move this method to `PlanUtil`? + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/expression/function/UserDefinedFunctionBuilder.java:48` + + +Done + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java:177` + + +Because I think the previous test case semantic is ambiguous `simple_query_string` can't query on target field right after an aggregation. Use eval operator to simulate a filter that can't be pushed down makes a lot sense. + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Done + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3850: Support partial filter push down + +**URL:** https://github.com/opensearch-project/sql/pull/3850 + +**Author:** @qianheng-aws + +**Created:** 2025-07-04T10:31:10Z + +**State:** MERGED + +**Merged:** 2025-07-18T15:31:54Z + +**Changes:** +280 -181 (8 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Support partial filter push down + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3470 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push.json:None` + + +shouldn't be `FILTER->[>=($1, 1)]` instead of `FILTER->AND(>=($1, 1), =($0, '880 Holmes Lane'))` in PushDownContext? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push.json:None` + + +It uses the original condition currently as its digest now. If we needs to use the pushed condition as its digest, we need to store that RexNode as well like non-pushed condition. Both is ok for functionality, the latter one should be more appropriate for explanation. + +Will make that change. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:148` + + +analyze() is only for testing? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:135` + + +nit, pushDownFilter should return newScan, if it is partial, return pair of newScan and un-pushed filters, then let IndexScanRule create new Filter node? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:148` + + +Yeah, we need QueryExpression now in our production code and invoke builder() later itself. Leave the original method only for testing. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:135` + + +Yeah + + +## General Comments + + +### @qianheng-aws + + +ping @LantaoJin @penghuo + + +### @qianheng-aws + + +Find a good case for Calcite with partial push down: + +Calcite with partial down could execute this PPL while v2 will throw exception: +``` +source=opensearch-sql_test_index_beer | eval answerId= AcceptedAnswerId + 1 | where simple_query_string(['Tags'], 'taste') and answerId > 200 +``` + +It's because Calcite could only push the relevance function into scan through `eval` command since it's unrelated to the `eval` command. It's made by `FilterProjectTransposeRule` and Partial Push Down feature. And the final plan is: + +``` +EnumerableProject(ParentId=[$0], CreationDate=[$1], Title=[$2], ViewCount=[$3], LastEditorUserId=[$4], ContentLicense=[$5], OwnerUserId=[$6], Score=[$7], FavoriteCount=[$8], LastActivityDate=[$9], AnswerCount=[$10], CommentCount=[$11], ClosedDate=[$12], Id=[$13], LastEditDate=[$14], PostTypeId=[$15], AcceptedAnswerId=[$16], Body=[$17], Tags=[$18], $f19=[+($16, 1)]) + EnumerableFilter(condition=[>(+($16, 1), 200)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_beer]], PushDownContext=[[PROJECT->[ParentId, CreationDate, Title, ViewCount, LastEditorUserId, ContentLicense, OwnerUserId, Score, FavoriteCount, LastActivityDate, AnswerCount, CommentCount, ClosedDate, Id, LastEditDate, PostTypeId, AcceptedAnswerId, Body, Tags], FILTER->simple_query_string(MAP('fields', MAP('Tags':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'taste':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"simple_query_string":{"query":"taste","fields":["Tags^1.0"],"flags":-1,"default_operator":"or","analyze_wildcard":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["ParentId","CreationDate","Title","ViewCount","LastEditorUserId","ContentLicense","OwnerUserId","Score","FavoriteCount","LastActivityDate","AnswerCount","CommentCount","ClosedDate","Id","LastEditDate","PostTypeId","AcceptedAnswerId","Body","Tags"],"excludes":[]},"sort":[{"_doc":{"order":"asc"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` + + +### @LantaoJin + + +@qianheng-aws please manually backport via above instructions. + + +### @qianheng-aws + + +> @qianheng-aws please manually backport via above instructions. + +Backport PR is here: https://github.com/opensearch-project/sql/pull/3899 + + +--- + +# PR #3849: Support full expression in WHERE clauses + +**URL:** https://github.com/opensearch-project/sql/pull/3849 + +**Author:** @LantaoJin + +**Created:** 2025-07-04T10:01:30Z + +**State:** MERGED + +**Merged:** 2025-07-21T18:41:14Z + +**Changes:** +271 -155 (7 files) + +**Labels:** `bug`, `backport 2.x`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +PPL where clause throws syntax error for: + +- SOURCE=test | WHERE x +- SOURCE=test | WHERE x OR y +- SOURCE=test | WHERE true +- SOURCE=test | WHERE (x < 1) = (y > 1) + +### Related Issues +Resolves #3273, #3272 and #3317 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @qianheng-aws - DISMISSED + + +LGTM if above 2 limitation are acceptable. + +Another concern is that we may not able to push down such filter condition of single field in DSL. But that issue could be addressed in a separate PR. + + +## Review Comments + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +The `SEARCH` keyword changed to required since the query `describe source=t` could be ambiguous. +`describe source=t` should throw syntax error "describe source= <==== .." +But it could be matched to +``` +(SEARCH)? logicalExpression fromClause +``` +logicalExpression -> valueExpr -> fieldExpr -> `describe` +fromClause -> `source=t` + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +Not sure why we supported the syntax of `(SEARCH)? logicalExpression fromClause`. + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:447` + + +The `AND` will be required in `Where` expression. But it is still optional in Search filter expression: +``` +source=t a=1 b=c +``` +✅ + +``` +source=t | where a=1 b=c +``` +❌ (SPL cannot work either) +``` +source=t | where a=1 and b=c +``` +✅ + +This limitation is because functionArgs could be ambiguous. For example: +``` +source=t | eval f=position('substr' IN 'str') +``` +can be parsed to an incorrect syntax tree: +Screenshot 2025-07-08 at 16 39 31 + + +The correct syntax tree is + +Screenshot 2025-07-08 at 16 41 02 + + + + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +search keywords is optional, this is valid query `status=200 source=index` + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +updated, now the `logicalExpression` could be both before and after `source clause` which is similar to SPL search command. +``` +searchCommand + : (SEARCH)? (logicalExpression)* fromClause (logicalExpression)* # searchFrom +``` + + +## General Comments + + +### @LantaoJin + + +Ping @qianheng-aws @dai-chen @penghuo @Swiddis + + +### @LantaoJin + + +@penghuo this pr did some refactoring on antlr g4 file, could you take another look? There are multiple PPL new command PRs filed, to avoid conflicts, let's complete this refactoring ASAP. + + +--- + +# PR #3848: [Backport 2.19-dev] Support relevance query functions pushdown implementation in Calcite (#3834) + +**URL:** https://github.com/opensearch-project/sql/pull/3848 + +**Author:** @songkant-aws + +**Created:** 2025-07-04T06:26:30Z + +**State:** MERGED + +**Merged:** 2025-07-07T03:08:30Z + +**Changes:** +877 -53 (27 files) + + +## Description + +### Description + +Support relevance query functions pushdown implementation in Calcite (opensearch-project#3834) + +* Support relevance query functions pushdown implementation + + + +* Remove unused change + + + +* Remove ITs from NoPushDownIT list + + + +* Add more UTs and javadocs + + + +* Support analyzing UnresolvedArgument RexNode and refactor code + + + +* Fix spotless check + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3847: [Backport 2.19-dev] Support filter push down for Sarg value #3840 + +**URL:** https://github.com/opensearch-project/sql/pull/3847 + +**Author:** @qianheng-aws + +**Created:** 2025-07-04T06:14:48Z + +**State:** MERGED + +**Merged:** 2025-07-08T15:22:50Z + +**Changes:** +145 -30 (5 files) + + +## Description + +### Description +backport #3840 to 2.19-dev + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Gentle ping @penghuo @dai-chen + + +--- + +# PR #3846: Allow warning header for yaml test + +**URL:** https://github.com/opensearch-project/sql/pull/3846 + +**Author:** @qianheng-aws + +**Created:** 2025-07-03T09:48:11Z + +**State:** MERGED + +**Merged:** 2025-07-09T16:26:51Z + +**Changes:** +37 -1 (11 files) + +**Labels:** `bug`, `flaky-test`, `backport 2.19-dev` + + +## Description + +### Description +Allow warning header for yaml test. Only see such failure for `3102.yml` and `3312.yml` so far. + +OpenSearchQueryRequest relies on `_id` to preserve sort location for PIT search: +https://github.com/opensearch-project/sql/blob/74b4de067cefc821055076bd108249cef777c3f9/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java#L195-L197 + +However, OpenSearch has deprecated the sort or aggregation behavior on `id`: +https://github.com/opensearch-project/OpenSearch/blob/36c2d5930ab3c036f19760942f95ea4bdb126137/server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java#L84-L87 + +So it will has warning message in the header of response, but the reason why it's flakey remains unclear. This PR make it workaround by allowing this warning header in yaml test. + +For long term consideration, this issue should be addressed by this one: https://github.com/opensearch-project/sql/issues/3064 + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3845 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +LGTM as a workaround. + + +### @penghuo - APPROVED + + +It should not impact DSL aggregation query. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3844: Backport codeowner from main to 2.19-dev + +**URL:** https://github.com/opensearch-project/sql/pull/3844 + +**Author:** @penghuo + +**Created:** 2025-07-02T22:42:42Z + +**State:** MERGED + +**Merged:** 2025-07-03T01:44:39Z + +**Changes:** +1 -1 (1 files) + + +## Description + +### Description +backport code owner from main to 2.19-dev to solve auto-merge does not work issue. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +While we're at it, should we fix the validation errors for the emeritus maintainers? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3843: [Backport 2.19-dev] Correct null order for `sort` command with Calcite (#3835) + +**URL:** https://github.com/opensearch-project/sql/pull/3843 + +**Author:** @yuancu + +**Created:** 2025-07-02T07:21:16Z + +**State:** MERGED + +**Merged:** 2025-07-02T22:17:33Z + +**Changes:** +128 -145 (18 files) + + +## Description + +### Description +Backport #3835 to 2.19-dev + +### Commit Message + +* Default sort to null_first with asc, null_last with desc + + + +* Correct null order related tests + + + +--------- + + +(cherry picked from commit 58e4336ca575f812a5a0a7a7e03cc283f357ef44) + +### Related Issues +Resolves #3380 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3841: [Backport 2.19-dev] Support span push down (#3823) + +**URL:** https://github.com/opensearch-project/sql/pull/3841 + +**Author:** @qianheng-aws + +**Created:** 2025-07-02T06:41:26Z + +**State:** MERGED + +**Merged:** 2025-07-03T15:42:24Z + +**Changes:** +252 -48 (21 files) + + +## Description + +### Description +[Describe what this change achieves] + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@qianheng-aws please fix the conflicts. + + +--- + +# PR #3840: Support filter push down for Sarg value + +**URL:** https://github.com/opensearch-project/sql/pull/3840 + +**Author:** @qianheng-aws + +**Created:** 2025-07-02T06:32:24Z + +**State:** MERGED + +**Merged:** 2025-07-03T15:19:41Z + +**Changes:** +144 -30 (5 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Support filter push down for Sarg value + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3839 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:517` + + +After fallback being disabled in future, `requireNonNull` means user will meet NPE which is unrecoverable case when the `sarg` is null here. For pushdown, as an enhancement, maybe just throw `PredicateAnalyzerException`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:517` + + +We have `catch (Throwable e)` in the `analyze` method which is the outermost shell for PredicateAnalyzer. So it will always catch all exception including NPE and re-throw `ExpressionNotAnalyzableException`. + +And I think it's just a defensive check, `isSearchWithPoints`, `isSearchWithComplementedPoints` both has similar code. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +add isTimeStamp() function in NamedFieldExpression, similar to isTextType() + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1162` + + +what if the Sarg include Range.singleton, between logic works? +for instance, `... | where (age>20 and age< 30) or age=35 ==> Sarg(Range.closed(20, 30), Range.singleton(35))` + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1162` + + +It still use RangeQueryBuilder for point 35(or Range[35, 35]), currently and will be `Range: {from: 35, to: 35}`. + +Maybe using TermQuery will be more efficiency. Will do that change. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3838: [Backport 2.19-dev] Change compare logical when comparing date related fields with string literal + +**URL:** https://github.com/opensearch-project/sql/pull/3838 + +**Author:** @xinyual + +**Created:** 2025-07-02T06:28:01Z + +**State:** MERGED + +**Merged:** 2025-07-02T21:08:35Z + +**Changes:** +252 -21 (10 files) + + +## Description + +### Description +Backport #3798 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3837: [Backport 3.1] Update the limitation docs + +**URL:** https://github.com/opensearch-project/sql/pull/3837 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-07-01T07:23:29Z + +**State:** MERGED + +**Merged:** 2025-07-10T10:21:43Z + +**Changes:** +60 -17 (3 files) + + +## Description + +Backport 39eefc890ff1d4d7ce489536e899396df1d79042 from #3801. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3836: [Backport 2.19-dev] Support Sort pushdown (#3620) + +**URL:** https://github.com/opensearch-project/sql/pull/3836 + +**Author:** @yuancu + +**Created:** 2025-07-01T05:41:41Z + +**State:** MERGED + +**Merged:** 2025-07-01T14:11:28Z + +**Changes:** +608 -28 (27 files) + + +## Description + +### Description +Backport 3620 to 2.19-dev + +### Commit Message + +* Support Sort pushdown + + + +Copy traits to logical index scan after pushing down sort + + + +Allow pushing down multiple times + + + +* Merge multiple sort when occurs + + + +* Update trendline explain IT Additionally correct merge collation method + + + +* Re-index collations when pushing down projection + + + +* Support sort pushdown with aggregation when aggregated fields are not in sort by fields + + + +* Update explain_output.json + + + +* Support multi-sort Additionally remove sort when pushing down aggregation after adding collation by pushing down sort + + + +* Sanitize collations when sort can't be pushed down + + + +* Test explaining sort | stats avg + + + +* Test sort date + + + +* Chores: reorder util functions + + + +* Pass null direction when pushing down sort + + + +* Convert to keyword fields when sorting opensearch text fields + + + +* Test explaining sort after rename + + + +* Push down sort-then-limit, while only pushdown limit when limit-then-sort + + + +* Fix flaky test related to irregular number in some locales in datetime IT + + + +* Override existing collations where there are more than one order clauses + + + +--------- + + +(cherry picked from commit 870b82c137fcd640e45783792865ab523a1561ad) + + +### Related Issues +#3380 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@penghuo we don't have permission to merge. can u help to do that. + + +--- + +# PR #3835: Correct null order for `sort` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3835 + +**Author:** @yuancu + +**Created:** 2025-07-01T05:34:04Z + +**State:** MERGED + +**Merged:** 2025-07-02T05:23:33Z + +**Changes:** +128 -145 (18 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Default null/missing order of sort to: +- first when ascending +- last when descending + +This aligns with what it states in sort's documentation: +> sort <[+|-] sort-field>... +> [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. Default: ascending order and NULL/MISSING first. + +The problem was originally raised at: https://github.com/opensearch-project/sql/pull/3620#issuecomment-3021591294 + +### Related Issues +Resolves #3380 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @qianheng-aws + + +@yuancu need backport manually + + + +### @yuancu + + +> @yuancu need backport manually + +No problem + + +--- + +# PR #3834: Support relevance query functions pushdown implementation in Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3834 + +**Author:** @songkant-aws + +**Created:** 2025-07-01T03:07:21Z + +**State:** MERGED + +**Merged:** 2025-07-02T09:53:12Z + +**Changes:** +871 -52 (27 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Support relevance query functions pushdown implementation in Calcite + +### Related Issues +Resolves #3462 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java:92` + + +Modify this IT query because sort pushdown is not merged to Calcite yet. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java:92` + + +it merged, can you rebase the code? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +is `unresolvedArgName` always lower case here? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +add the `funcName` in the exception message + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +add the `fncName` in the exception message + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +please add UT for this new logic in PredicateAnalyzerTest.java + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java:None` + + +missing javadoc + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java:None` + + +missing javadoc + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java:None` + + +can we add some UT for this method? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java:None` + + +can we add some UTs for this method? + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java:92` + + +Rebased the code but the Sort pushdown has different behavior of handling `missing_: first/last` than V2. Calcite IT still has different assertion. So I leave my change here. + +Waiting for @yuancu new PR merging. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:None` + + +Done. Added more UTs + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +I don't get why not replying on `visitRelevanceFieldList` to resolve all fields and then we don't need specific handing for relevance functions. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java:None` + + +Added javadoc. The method is covered by new added PredicateAnalyzer's UTs. + + +### @songkant-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java:None` + + +Added javadoc. The method is covered by new added PredicateAnalyzer's UTs. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +If they share the same implementation, maybe we don't need to distinguish them in the plan. + +Relevant: https://github.com/opensearch-project/sql/pull/3576#discussion_r2059775309 + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +The intention here is tricky. I need different function names to differentiate implementations in pushdown query builder. I think the best thing I can do here is probably to create a static RelevanceQueryFunction to reduce its instances. Thoughts? + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +As discussed, we can support UnresolvedArgument as alias(literal). Making such changes + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +Another option here is to pass function name as one of operand. I don't have strong opinion here. It's the tradeoff between readability and wired implementation + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java:None` + + +Ah I got it. Thanks. Then I think the current implementation is appropriate. + + +### @songkant-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java:None` + + +Addressed UnresolvedArgument change. The amount of changes is less than I expected. + + +## General Comments + + +### @penghuo + + +I don't think we should support relevance query functions directly. The concern is that it would allow users to use them anywhere a predicate expression is valid. However, the query engine does not support relevance evaluation, so it must always push down these functions to the DSL. If pushdown fails, the query will throw an exception. + +Instead, I propose supporting relevance-based queries only within the [search command](https://github.com/opensearch-project/sql/issues/3807). + + +### @songkant-aws + + +@penghuo As discussed, since V2 relevance function cannot be removed quickly. We can mark these functions deprecated once we introduce new syntax of relevance query in search command directly. + + +### @LantaoJin + + +@songkant-aws please manually backport it to 2.19-dev + + +--- + +# PR #3833: [Bug-fix] Translate JSONException to 400 instead of 500 + +**URL:** https://github.com/opensearch-project/sql/pull/3833 + +**Author:** @RyanL1997 + +**Created:** 2025-06-30T21:30:30Z + +**State:** MERGED + +**Merged:** 2025-07-07T17:48:38Z + +**Changes:** +57 -25 (4 files) + +**Labels:** `bug`, `v3.2.0` + + +## Description + +### Description +Translate JSONException to 400 instead of 500 + +### Related Issues +* Resolve https://github.com/opensearch-project/sql/issues/3832 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +Meanwhile do you have error stacktrace? I'm thinking another option is to figure out why this JSON exception is not captured. + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +### @noCharger - COMMENTED + + +@RyanL1997 should this backport to 2.x and 2.19-dev? + + +## Review Comments + + +### @dai-chen on `legacy/src/test/java/org/opensearch/sql/legacy/plugin/RestSQLActionTest.java:None` + + +Thanks for adding UT. I feel this class is tightly coupled with OpenSearch core. You can either mock its dependencies or add IT instead. Ref: https://github.com/opensearch-project/sql/blob/74b4de067cefc821055076bd108249cef777c3f9/integ-test/src/test/java/org/opensearch/sql/ppl/PPLPluginIT.java#L52. We need IT for both SQL and PPL. + + +## General Comments + + +### @RyanL1997 + + +@dai-chen +> Meanwhile do you have error stacktrace? I'm thinking another option is to figure out why this JSON exception is not captured. + +Here we go: + +``` +[2025-07-02T14:54:39,361][ERROR][o.o.s.l.p.RestSqlAction ] [xxxxxxxxxxx.ant.amazon.com] ce4ecfbe-a386-4799-a482-2cce38b75e76 Server side error during query execution +org.json.JSONException: JSONObject["query"] is not a string (class org.json.JSONArray). + at org.json.JSONObject.wrongValueFormatException(JSONObject.java:2906) ~[?:?] + at org.json.JSONObject.getString(JSONObject.java:859) ~[?:?] + at org.opensearch.sql.legacy.request.SqlRequestFactory.parseSqlRequestFromPayload(SqlRequestFactory.java:59) ~[?:?] + at org.opensearch.sql.legacy.request.SqlRequestFactory.getSqlRequest(SqlRequestFactory.java:30) ~[?:?] + at org.opensearch.sql.legacy.plugin.RestSqlAction.prepareRequest(RestSqlAction.java:119) ~[?:?] + at org.opensearch.rest.BaseRestHandler.handleRequest(BaseRestHandler.java:113) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.security.filter.SecurityRestFilter$AuthczRestHandler.handleRequest(SecurityRestFilter.java:193) ~[?:?] + at org.opensearch.rest.RestController.dispatchRequest(RestController.java:381) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.rest.RestController.tryAllHandlers(RestController.java:467) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.rest.RestController.dispatchRequest(RestController.java:287) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.security.ssl.http.netty.ValidatingDispatcher.dispatchRequest(ValidatingDispatcher.java:69) ~[?:?] + at org.opensearch.http.AbstractHttpServerTransport.dispatchRequest(AbstractHttpServerTransport.java:374) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.http.AbstractHttpServerTransport.handleIncomingRequest(AbstractHttpServerTransport.java:482) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.http.AbstractHttpServerTransport.incomingRequest(AbstractHttpServerTransport.java:357) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + at org.opensearch.http.netty4.Netty4HttpRequestHandler.channelRead0(Netty4HttpRequestHandler.java:56) ~[?:?] + at org.opensearch.http.netty4.Netty4HttpRequestHandler.channelRead0(Netty4HttpRequestHandler.java:42) ~[?:?] + at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at org.opensearch.http.netty4.Netty4HttpPipeliningHandler.channelRead(Netty4HttpPipeliningHandler.java:72) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:107) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:107) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:107) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:102) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:346) ~[?:?] + at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:318) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:289) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:107) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.handler.ssl.SslHandler.unwrap(SslHandler.java:1519) ~[?:?] + at io.netty.handler.ssl.SslHandler.decodeJdkCompatible(SslHandler.java:1377) ~[?:?] + at io.netty.handler.ssl.SslHandler.decode(SslHandler.java:1428) ~[?:?] + at io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:530) ~[?:?] + at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:469) ~[?:?] + at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:290) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[?:?] + at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1357) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440) ~[?:?] + at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[?:?] + at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:868) ~[?:?] + at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166) ~[?:?] + at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:796) ~[?:?] + at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:697) ~[?:?] + at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:660) ~[?:?] + at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:562) ~[?:?] + at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:998) ~[?:?] + at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[?:?] + at java.base/java.lang.Thread.run(Thread.java:1583) [?:?] +``` + + +### @dai-chen + + +> @dai-chen +> +> > Meanwhile do you have error stacktrace? I'm thinking another option is to figure out why this JSON exception is not captured. +> +> Here we go: +> +> [2025-07-02T14:54:39,361][ERROR][o.o.s.l.p.RestSqlAction ] [xxxxxxxxxxx.ant.amazon.com] ce4ecfbe-a386-4799-a482-2cce38b75e76 Server side error during query execution +> org.json.JSONException: JSONObject["query"] is not a string (class org.json.JSONArray). +> at org.json.JSONObject.wrongValueFormatException(JSONObject.java:2906) ~[?:?] +> at org.json.JSONObject.getString(JSONObject.java:859) ~[?:?] +> at org.opensearch.sql.legacy.request.SqlRequestFactory.parseSqlRequestFromPayload(SqlRequestFactory.java:59) ~[?:?] +> at org.opensearch.sql.legacy.request.SqlRequestFactory.getSqlRequest(SqlRequestFactory.java:30) ~[?:?] +> at org.opensearch.sql.legacy.plugin.RestSqlAction.prepareRequest(RestSqlAction.java:119) ~[?:?] +> at org.opensearch.rest.BaseRestHandler.handleRequest(BaseRestHandler.java:113) ~[opensearch-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] + +Got it. In this case, we can capture it in parse payload method for SQL and PPL separately. The reason is this `org.json.JSONException` is very specific, I think we'd better keep exception list in `isClientError` clean. Thanks! + + + +### @RyanL1997 + + +I don't think this one can fall into either the criteria of maintenance and critical bugfix. We can if we consider to include this into the future patch of 2.19.x. + + +--- + +# PR #3831: Support casting date literal to timestamp + +**URL:** https://github.com/opensearch-project/sql/pull/3831 + +**Author:** @yuancu + +**Created:** 2025-06-30T11:19:55Z + +**State:** MERGED + +**Merged:** 2025-07-22T14:10:52Z + +**Changes:** +525 -475 (22 files) + +**Labels:** `bug`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description + +Before this PR, only the following casts are supported + +| | date | time | timestamp | +|----------------|-------|-----|-----------| +| date str | ✅ | ❌ | ❌ | +| time str | ❌ | ✅ | ❌ | +| timestamp str | ✅ | ✅ | ✅ | + + +With this PR: + +| | date | time | timestamp | +|-----------------|-------|-------|-----------| +| date str | ✅ | ❌ | ✅ | +| time str | ❌ | ✅ | ❌ | +| timestamp str | ✅ | ✅ | ✅ | + +The castings that remain impossible are in place to avoid swallowing errors silently. + +**An example case it solves** +`... | where date_time > '1950-10-11' | fields date_time` +- Before this PR + - calcite + ``` + EnumerableCalc(expr#0=[{inputs}], expr#1=['1950-10-11':VARCHAR], expr#2=[TIMESTAMP($t1)], expr#3=[>($t0, $t2)], date_time=[$t0], $condition=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, dates]], PushDownContext=[[PROJECT->[date_time]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["date_time"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + ``` + Run into follow exception when executed + ```json + { + "error": { + "reason": "There was internal problem at backend", + "details": "java.sql.SQLException: exception while executing query: timestamp:1950-10-11 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]'", + "type": "RuntimeException" + }, + "status": 500 + } + ``` + - v2 + ``` + { + "error": { + "reason": "Invalid Query", + "details": "timestamp:1950-10-11 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]'", + "type": "SemanticCheckException" + }, + "status": 400 + } + ``` +- After this PR + - calcite + ``` + CalciteEnumerableIndexScan(table=[[OpenSearch, dates]], PushDownContext=[[PROJECT->[date_time], FILTER->>($0, '1950-10-11 00:00:00')], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"date_time":{"from":"1950-10-11T00:00:00.000Z","to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["date_time"],"excludes":[]},"sort":[{"_doc":{"order":"asc"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + ``` + - v2 + ``` + { + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[date_time]" + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=dates, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"date_time\":{\"from\":\"1950-10-11T00:00:00.000Z\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"date_time\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } + } + ``` + +**Callout**: The exception thrown when failed to parse a date/time string is changed to `ExpressionEvaluationException` (from `SemanticCheckException`). Users relying on the exception type will be affected. + +**Implementation Notes** +- With Calcite, the original way was just to make literals with types like `EXPR_TIMESTAMP`. The values inside are not validated: https://github.com/apache/calcite/blob/a6973bde8107a7c2a605a82c779a64fe28bdb5d1/core/src/main/java/org/apache/calcite/rex/RexBuilder.java#L835 +- Now we explicitly call `TIMESTAMP`, `TIME`, or `DATE` functions to cast string values. + +**Concerns** +- Calling UDFs for casting may render it impossible to e.g. pushdown +- Calling UDFs for casting may make the execution slower + +### Related Issues +Resolves #3728 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +> Calling UDFs for casting may render it impossible to e.g. pushdown + +Is this still a concern if expression pushdown is supported? + +Meanwhile in V2, even though expression pushdown is available, I recall there is performance degradation when CAST is involved—queries may fall back from DSL search to expression script evaluation. Probably we can double check and see if there’s a possible workaround in V3. + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java:105` + + +Good job to support passing `null` in `verifyDataRows` + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java:441` + + +seems we are not aligning with Spark, so this is the behviour of v2 or mysql? + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java:441` + + +This was the behavior of v2. + +In MySQL, cast a time string to date will return NULL; in some other databases like Postgres, it raises an error. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/data/model/ExprTimestampValue.java:None` + + +I think `SemanticCheckException` is inappropriate here. This issue isn't related to semantic at all. Should be something like IllegalArgumentException. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/data/model/ExprTimestampValue.java:None` + + +Shall we support cast `time str` -> `timestamp` in the future? Maybe replace `DATE_TIME_FORMATTER_VARIABLE_NANOS` with `DATE_TIME_FORMATTER_VARIABLE_NANOS_OPTIONAL` directly if that's the case. + +And I wonder if using `DateTimeFormatter` with multiple patterns should be more efficient than `DateTimeParser.parseDateOrTimestamp` which try to parseDate and fallback to parseTimeStamp when throwing exception. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/data/model/ExprTimestampValue.java:None` + + +Changed to `ExpressionEvaluationException` when failed parsing datetime literals/strings. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/data/model/ExprTimestampValue.java:None` + + +Fixed by replacing `parse*Or*` calls with new `DateTimeFormatter`s + + +## General Comments + + +### @dai-chen + + +There happens to be a new issue https://github.com/opensearch-project/sql/issues/3842 related. + + +### @yuancu + + +> Is this still a concern if expression pushdown is supported? + +It should not be a issue when script pushdown is supported. Besides, if it is a literal that has to be cast, it can be pushed-down as a range query with #3798. I added an example with physical plan in the PR description to demonstrate this. + +> Probably we can double check and see if there’s a possible workaround in V3. + +#3798 circumvents script push-down by converting the string literal to another DSL-recognizable literal then pushing down. + + + + +### @qianheng-aws + + +Not related to this issue, I found there are some inefficient implementation for the date related functions, like branch selection at runtime for `TIMESTAMP` which should be determined in compiling phase based on the type of args, and redundant transformation between String and ExprStringValue for `DATE`. I don't check all of them. + + + +### @yuancu + + +One problem I discovered during testing: + +Errors thrown during executing PPL with Calcite will often result in 500 error with Calcite. + +For example query `source=t0001 | where @timestamp > '12:00'` has the following response with Calcite: + +```json +{ + "error": { + "reason": "There was internal problem at backend", + "details": "java.sql.SQLException: exception while executing query: timestamp:12:00 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]'", + "type": "RuntimeException" + }, + "status": 500 +} +``` + +But in v2, it's: + +```json +{ + "error": { + "reason": "Invalid Query", + "details": "timestamp:12:00 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]'", + "type": "ExpressionEvaluationException" + }, + "status": 400 +} +``` + + +This is because exceptions thrown during `statement.executeQuery()` will be wrapped with `SQLException` (see [AvaticaConnection.java#L579](https://github.com/apache/calcite-avatica/blob/63942a457248076b1a500c805b43a75c11798bd8/core/src/main/java/org/apache/calcite/avatica/AvaticaConnection.java#L579)), which is then captured by us and wrapped with a `RuntimeException`: +```java + try (PreparedStatement statement = OpenSearchRelRunners.run(context, rel)) { + ResultSet result = statement.executeQuery(); + buildResultSet(result, rel.getRowType(), context.querySizeLimit, listener); + } catch (SQLException e) { + throw new RuntimeException(e); + } +``` + +**A simple fix**: retrieving the wrapped exception from the `SQLException` and re-throw it. Should I implement this? + +

    +example stacktrace + +``` +[2025-07-10T14:28:48,782][ERROR][o.o.s.p.r.RestPPLQueryAction] [7cf34de73b85] Error happened during query handling +java.lang.RuntimeException: java.sql.SQLException: exception while executing query: timestamp:12:00 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]' + at org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.lambda$execute$6(OpenSearchExecutionEngine.java:203) ~[?:?] + at java.base/java.security.AccessController.doPrivileged(AccessController.java:319) ~[?:?] + at org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.lambda$execute$7(OpenSearchExecutionEngine.java:196) ~[?:?] + at org.opensearch.sql.opensearch.client.OpenSearchNodeClient.schedule(OpenSearchNodeClient.java:193) ~[?:?] + at org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.execute(OpenSearchExecutionEngine.java:194) ~[?:?] + at org.opensearch.sql.executor.QueryService.lambda$executeWithCalcite$0(QueryService.java:107) ~[?:?] + at java.base/java.security.AccessController.doPrivileged(AccessController.java:319) ~[?:?] + at org.opensearch.sql.executor.QueryService.executeWithCalcite(QueryService.java:96) ~[?:?] + at org.opensearch.sql.executor.QueryService.execute(QueryService.java:72) ~[?:?] + at org.opensearch.sql.executor.execution.QueryPlan.execute(QueryPlan.java:69) ~[?:?] + at org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.lambda$submit$0(OpenSearchQueryManager.java:31) ~[?:?] + at org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.lambda$withCurrentContext$1(OpenSearchQueryManager.java:45) ~[?:?] + at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:916) ~[opensearch-3.1.0-SNAPSHOT.jar:3.1.0-SNAPSHOT] + at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?] + at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?] + at java.base/java.lang.Thread.run(Thread.java:1575) [?:?] +Caused by: java.sql.SQLException: exception while executing query: timestamp:12:00 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]' + at org.apache.calcite.avatica.Helper.createException(Helper.java:56) ~[?:?] + at org.apache.calcite.avatica.Helper.createException(Helper.java:41) ~[?:?] + at org.apache.calcite.avatica.AvaticaConnection.executeQueryInternal(AvaticaConnection.java:579) ~[?:?] + at org.apache.calcite.avatica.AvaticaPreparedStatement.executeQuery(AvaticaPreparedStatement.java:137) ~[?:?] + at org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.lambda$execute$6(OpenSearchExecutionEngine.java:200) ~[?:?] + ... 15 more +Caused by: org.opensearch.sql.exception.ExpressionEvaluationException: timestamp:12:00 in unsupported format, please use 'yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]' + at org.opensearch.sql.data.model.ExprTimestampValue.(ExprTimestampValue.java:43) ~[?:?] + at org.opensearch.sql.calcite.utils.datetime.DateTimeConversionUtils.convertToTimestampValue(DateTimeConversionUtils.java:62) ~[?:?] + at org.opensearch.sql.expression.function.udf.datetime.TimestampFunction.timestamp(TimestampFunction.java:75) ~[?:?] + at Baz$1$1.moveNext(Unknown Source) ~[?:?] + at org.apache.calcite.linq4j.Linq4j$EnumeratorIterator.(Linq4j.java:666) ~[?:?] + at org.apache.calcite.linq4j.Linq4j.enumeratorIterator(Linq4j.java:99) ~[?:?] + at org.apache.calcite.linq4j.AbstractEnumerable.iterator(AbstractEnumerable.java:33) ~[?:?] + at org.apache.calcite.avatica.MetaImpl.createCursor(MetaImpl.java:83) ~[?:?] + at org.apache.calcite.avatica.AvaticaResultSet.execute(AvaticaResultSet.java:186) ~[?:?] + at org.apache.calcite.jdbc.CalciteResultSet.execute(CalciteResultSet.java:64) ~[?:?] + at org.apache.calcite.jdbc.CalciteResultSet.execute(CalciteResultSet.java:43) ~[?:?] + at org.apache.calcite.avatica.AvaticaConnection.executeQueryInternal(AvaticaConnection.java:575) ~[?:?] + at org.apache.calcite.avatica.AvaticaPreparedStatement.executeQuery(AvaticaPreparedStatement.java:137) ~[?:?] + at org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.lambda$execute$6(OpenSearchExecutionEngine.java:200) ~[?:?] + ... 15 more +``` +
    + + +### @yuancu + + +> Not related to this issue, I found there are some inefficient implementation for the date related functions, like branch selection at runtime for `TIMESTAMP` which should be determined in compiling phase based on the type of args, and redundant transformation between String and ExprStringValue for `DATE`. I don't check all of them. + +I spotted a few for functions under `java/org/opensearch/sql/expression/function/udf/datetime`. I can fix them in another new PR. + +Besides, almost all datetime functions convert inputs to `ExprValue`s to reuse v2's implementation. I can eliminate them to enhance efficiency if the anticipated improvement is significant. + + +### @yuancu + + +Should this PR be backported? @LantaoJin + + +### @LantaoJin + + +> Should this PR be backported? @LantaoJin + +Yes, please. + + +--- + +# PR #3829: Support struct field with dynamic disabled + +**URL:** https://github.com/opensearch-project/sql/pull/3829 + +**Author:** @qianheng-aws + +**Created:** 2025-06-30T07:29:22Z + +**State:** MERGED + +**Merged:** 2025-07-09T16:20:57Z + +**Changes:** +101 -2 (6 files) + +**Labels:** `bug`, `backport 2.x`, `backport 2.19-dev` + + +## Description + +### Description +Support struct field with dynamic disabled + +If dynamic mapping disabled, although information of type missed for the new added fields, we can still parse value from the content by itself only. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3343 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java:196` + + +can we add some comment to explain why we parse context when field type is not present? + + +## General Comments + + +### @qianheng-aws + + +Another issue is that we don't serialize json whose value is null. + +For example: +``` +POST {{baseUrl}}/_bulk/ +Content-Type: application/x-ndjson + +{"index": {"_index": "test001"}} +{"profile": {"age": 1}} +{"index": {"_index": "test001"}} +{"profile": {"address": null}} +``` + +We will currently get result for PPL `source=test001`: +``` +{ + "schema": [ + { + "name": "profile", + "type": "struct" + } + ], + "datarows": [ + [ + { + "age": 1 + } + ], + [ + {} + ] + ], + "total": 2, + "size": 2 +} +``` + +But will get this if configuring `serializeNulls` for GSON: +``` +{ + "schema": [ + { + "name": "profile", + "type": "struct" + } + ], + "datarows": [ + [ + { + "age": 1 + } + ], + [ + { + "address": null + } + ] + ], + "total": 2, + "size": 2 +} +``` + +Which result do we want to generate? I would prefer the latter one. Shall we fix this as well? @penghuo + + +### @LantaoJin + + ++1 for serializeNulls + + +### @LantaoJin + + +Ping @penghuo @dai-chen @Swiddis + + +--- + +# PR #3826: Pass JOIN_TIME_OUT value to keepalive + +**URL:** https://github.com/opensearch-project/sql/pull/3826 + +**Author:** @ahkcs + +**Created:** 2025-06-26T22:43:27Z + +**State:** MERGED + +**Merged:** 2025-08-07T16:38:21Z + +**Changes:** +914 -14 (9 files) + +**Labels:** `bug` + + +## Description + +## Description +Pass JOIN_TIME_OUT value to keepalive +issue link + +https://github.com/opensearch-project/sql/issues/3820 + + +## Result + +When running this: + +``` +curl -X POST "localhost:9200/_plugins/_sql" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "SELECT /*! JOIN_TIME_OUT(120) */ l.id, l.name, r.value FROM test_left AS l LEFT JOIN test_right AS r ON l.epoch = r.epoch LIMIT 5" + }' +``` + +``` +Request SQLQueryRequest(jsonContent={"query":"SELECT /*! JOIN_TIME_OUT(120) */ l.id, l.name, r.value FROM test_left AS l LEFT JOIN test_right AS r ON l.epoch = r.epoch LIMIT 5"}, query=SELECT /*! JOIN_TIME_OUT(120) */ l.id, l.name, r.value FROM test_left AS l LEFT JOIN test_right AS r ON l.epoch = r.epoch LIMIT 5, path=/_plugins/_sql, format=jdbc, params={}, sanitize=true, pretty=false, cursor=Optional.empty) is not supported and falling back to old SQL engine +[2025-07-01T11:42:37,617][INFO ][o.o.s.l.p.RestSqlAction ] [integTest-0] Request Query: ( SELECT /*! JOIN_TIME_OUT(12number) */ identifier, identifier, identifier FROM table l LEFT JOIN table r ON identifier = identifier LIMIT number ) +[2025-07-01T11:42:37,649][INFO ][o.o.s.l.q.p.HashJoinQueryPlanRequestBuilder] [integTest-0] HashJoinQueryPlanRequestBuilder: Found JOIN_TIME_OUT hint: 120 seconds, configuring PIT keepalive +[2025-07-01T11:42:37,650][INFO ][o.o.s.l.q.p.c.Config ] [integTest-0] Config: Set custom PIT keepalive to: 2m (120000ms) +[2025-07-01T11:42:37,657][INFO ][o.o.s.l.e.j.QueryPlanElasticExecutor] [integTest-0] QueryPlanElasticExecutor: Using custom PIT keepalive from JOIN_TIME_OUT hint: 120 seconds +[2025-07-01T11:42:37,666][INFO ][o.o.s.l.q.p.p.n.p.PointInTime] [integTest-0] PointInTime: Creating PIT with JOIN_TIME_OUT hint support: 120 seconds +[2025-07-01T11:42:37,666][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Using custom PIT keepalive from config: 2m (120000ms) +[2025-07-01T11:42:37,666][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Creating PIT with keepalive: 2m (120000ms) +[2025-07-01T11:42:37,680][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Created Point In Time s9y3QQEJdGVzdF9sZWZ0 with keepalive 2m successfully. +[2025-07-01T11:42:37,680][INFO ][o.o.s.l.q.p.p.n.p.PointInTime] [integTest-0] Loading first batch of response using Point In Time +[2025-07-01T11:42:37,728][INFO ][o.o.s.l.q.p.p.n.p.PointInTime] [integTest-0] Loading next batch of response using Point In Time. - s9y3QQEJdGVzdF9sZWZ0 +[2025-07-01T11:42:37,733][INFO ][o.o.s.l.q.p.p.n.p.PointInTime] [integTest-0] PointInTime: Creating PIT with JOIN_TIME_OUT hint support: 120 seconds +[2025-07-01T11:42:37,733][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Using custom PIT keepalive from config: 2m (120000ms) +[2025-07-01T11:42:37,733][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Creating PIT with keepalive: 2m (120000ms) +[2025-07-01T11:42:37,734][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Created Point In Time s9y3QQEKdGVzdF9yaWdo with keepalive 2m successfully. +[2025-07-01T11:42:37,734][INFO ][o.o.s.l.q.p.p.n.p.PointInTime] [integTest-0] Loading first batch of response using Point In Time +[2025-07-01T11:42:37,736][INFO ][o.o.s.l.q.p.p.n.p.PointInTime] [integTest-0] Loading next batch of response using Point In Time. - s9y3QQEKdGVzdF9yaWdo +[2025-07-01T11:42:37,742][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Delete Point In Time s9y3QQEJdGVzdF9sZWZ0 status: 200 +[2025-07-01T11:42:37,742][INFO ][o.o.s.l.p.PointInTimeHandlerImpl] [integTest-0] Delete Point In Time s9y3QQEKdGVzdF9yaWdo status: 200 + + + +``` + + + +## Reviews + + +### @Swiddis - CHANGES_REQUESTED + + +Misclicked approve 😔 + + +### @RyanL1997 - COMMENTED + + +Left some minor comments on this + + +### @dai-chen - COMMENTED + + +Thanks for the changes! Just one last comment is UT / IT are both missing? If so, I think we should add both if possible. + + +### @noCharger - COMMENTED + + +Please remove internal link in PR description. + + +## Review Comments + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +suggestion: `LOG.debug`, no? + +Although in this case [I'd use trace](https://stackoverflow.com/a/64806781/7543069) + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +todo (security): don't log the query directly, might contain confidential info. + + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +question: When does this ever happen? Can we do any better than catching `Exception`? + + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +Same as above, when can this ever fail? And can we be more specific in our exceptions? + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java:None` + + +suggestion: For type safety and implementation clarity, I think this should be an `Optional`. + + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java:86` + + +issue: Since the PIT ID is long, this is going to flood the logs for other oncalls. + +At least we should make this `debug`, I wonder if we could go further and truncate the PIT id? + + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java:None` + + +nit: rest of the team doesn't like wildcard imports, this was probably done automatically by the IDE. [You can disable it.] + + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +thought: since `run` is already a long function, we should probably extract the PIT construction to its own method for clarity. + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +suggestion: duplicate of the above logic, we should break this into a function. + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +changed to debug + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +deleted + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +Added throw + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +Added throw + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java:86` + + +truncated + + +### @ahkcs on `opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java:None` + + +fixed + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +Fixed + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +extracted + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java:None` + + +Changed + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +Log-rethrow is usually a bad pattern, obfuscates the original error and pollutes logs. + +The rule of thumb I learned is "log the error where you handle it," since the outermost scope has the most context to make a useful error message, and you don't end up with a two-page stack trace filled with "Caused-by:". Try documenting the error contract of the function in a javadoc: what exceptions can it throw and what do they mean? Then in the callers of the function, review if it makes sense to handle the error there, or if it should propagate further up. (For best results, it may be best to define a custom exception type) + +Eventually something should know how to handle the issue and either a) stop it from propagating to the user or b) format as in a human-readible user error that explains the problem and how to fix it. + + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +removed Log-rethrow + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +we should remove these comments + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +same here + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +nit: remove these emojis since it my cause some display problem for some terminals + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +removed + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +removed + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java:None` + + +done + + +### @RyanL1997 on `legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java:None` + + +why we switch to debug here instead? + + +### @ahkcs on `legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java:None` + + +switched back + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java:None` + + +`Config` is already in HashJoinQueryPlanRequestBuilder right? Why we need to do this? Is reflection really required? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/TableScan.java:None` + + +I see there is `hintLimit` in `TableInJoinRequestBuilder`. We can avoid all these changes by adding more hint values or `Config` to request builder? + + +## General Comments + + +### @Swiddis + + +`PointInTimeHandlerImplTest` is failing in CI (along with those datetime tests that have been flaky since forever) + + +### @ahkcs + + +> `PointInTimeHandlerImplTest` is failing in CI (along with those datetime tests that have been flaky since forever) + +Fixed + + +### @noCharger + + +> Thanks for the changes! Just one last comment is UT / IT are both missing? If so, I think we should add both if possible. + ++1 on test coverage + + +### @noCharger + + +Let's resolve the conflicts before merging + + +--- + +# PR #3824: [Backport 2.19-dev] [BUG] Fix flaky tests related to WeekOfYear in CalcitePPLDateTimeBuiltinFunctionIT + +**URL:** https://github.com/opensearch-project/sql/pull/3824 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-26T17:17:08Z + +**State:** MERGED + +**Merged:** 2025-07-01T03:00:36Z + +**Changes:** +40 -13 (1 files) + + +## Description + +Backport 30f250857331e9b7649ce8c8f1729a38d977410b from #3815. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +ping @qianheng-aws + + +--- + +# PR #3823: Support span push down + +**URL:** https://github.com/opensearch-project/sql/pull/3823 + +**Author:** @qianheng-aws + +**Created:** 2025-06-26T14:51:42Z + +**State:** MERGED + +**Merged:** 2025-07-01T14:22:43Z + +**Changes:** +253 -48 (21 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +Support span push down + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3384 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:190` + + +the case like `avg(a+1)`? do we have an issue to track? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:275` + + +Can we create a static method in PlanUtil for this logic? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json:4` + + +we need more test cases for: + +- span(field, integer) +- span(field, decimal) +- span(timestamp, integer unit) +- span(date, integer unit) + +The unit should contain at least "day, m, M, QUARTER" and "MS" (DSL unsupported) + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:190` + + +Yes, this one https://github.com/opensearch-project/sql/issues/3386 + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:275` + + +Just want to make use of the auto conversion in java21. If we extract these into a separate method, may need to do explicit conversion. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json:4` + + +- span(timestamp, integer unit) +- span(date, integer unit) +will both hit the same code coverage. + +So as to `day, m, QUARTER, MS` + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json:4` + + +I've tested with `MS`, its supported in DSL + +``` +{ + "from": 0, + "size": 0, + "timeout": "1m", + "aggregations": { + "composite_buckets": { + "composite": { + "size": 1000, + "sources": [ + { + "span(birthdate,1ms)": { + "date_histogram": { + "field": "l_receiptdate", + "missing_bucket": true, + "missing_order": "first", + "order": "asc", + "fixed_interval": "1ms" + } + } + } + ] + }, + "aggregations": { + "count()": { + "value_count": { + "field": "_index" + } + } + } + } + } +} +``` + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json:4` + + +- span(field, decimal) + +Our SpanFunction doesn't support decimal currently, since linq4j divide will throw exception `Binary numeric promotion not possible on types "int" and "java.math.BigDecimal"`, and we implement it by using `field / interval * interval`. + +Maybe support it in another PR later. + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:62` + + +Question: OpenSearchProjectIndexScanRule should push down project? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchAggregateIndexScanRule.java:62` + + +Yes, both rules will take effect. + +But because Calcite's optimizer is not doing transformation on one plan, but keep generating new plans, putting them in the RelSet, and it will choose the cheapest plan in the end, so OpenSearchProjectIndexScanRule pushing down project won't prevent this OpenSearchAggregateIndexScanRule. + +For example, let's say we have original plan `Agg - Project - Scan`, it will generate a new plan `Agg - Scan` by OpenSearchProjectIndexScanRule and a new plan `Scan` by OpenSearchAggregateIndexScanRule. So we have 3 plans in the RelSet [`Agg - Project - Scan`, `Agg - Scan`, `Scan`], and in the end the optimizer will choose `Scan` as the final plan since its cost is the cheapest. + + +## General Comments + + +### @qianheng-aws + + +IT failed because we have different behavior for span(“1961-04-12 09:10:00”) between push down and non push down. +- Push down: it’s rounded to “1961-04-12 09:00:00" +- Non push down: it’s rounded to “1961-04-12 10:00:00” because the timestamp is a negative value as it’s before 1971 + +v2 has the same problem since the Calcite reuses the same logic as v2. Maybe we should fix this in another PR. For this PR, we will allow different results for this minor case and let IT pass. + + +--- + +# PR #3822: Add big5 to IT Suite + +**URL:** https://github.com/opensearch-project/sql/pull/3822 + +**Author:** @LantaoJin + +**Created:** 2025-06-26T00:05:00Z + +**State:** MERGED + +**Merged:** 2025-07-08T18:12:05Z + +**Changes:** +722 -11 (49 files) + +**Labels:** `enhancement`, `testing`, `backport 2.19-dev` + + +## Description + +### Description +Add big5 (https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/big5/queries) to PPL IT suite. +Only one documentation in big5 index. The target of this IT suite is displaying the end-to-end execution times (analyzing + optimizing + codegen + compiling). +It prints the end2end execution times, on my laptop is: +``` +Summary of PPLBig5IT: +range_field_conjunction_big_range_big_term_query: 94 ms +range: 13 ms +query_string_on_message: 38 ms +composite_date_histogram_daily: 58 ms +desc_sort_timestamp: 14 ms +desc_sort_with_after_timestamp: 5 ms +terms_significant_1: 11 ms +range_numeric: 6 ms +default: 6 ms +asc_sort_with_after_timestamp: 5 ms +sort_numeric_asc_with_match: 6 ms +sort_numeric_desc_with_match: 5 ms +composite_terms_keyword: 9 ms +sort_numeric_asc: 5 ms +keyword_terms_low_cardinality: 11 ms +range_field_conjunction_small_range_big_term_query: 5 ms +keyword_in_range: 6 ms +range_with_desc_sort: 5 ms +range_with_asc_sort: 6 ms +multi_terms_keyword: 7 ms +range_field_disjunction_big_range_small_term_query: 11 ms +query_string_on_message_filtered: 5 ms +term: 4 ms +sort_keyword_can_match_shortcut: 4 ms +range_field_conjunction_small_range_small_term_query: 6 ms +scroll: 5 ms +terms_significant_2: 6 ms +keyword_terms: 4 ms +date_histogram_minute_agg: 7 ms +asc_sort_timestamp_can_match_shortcut: 4 ms +sort_numeric_desc: 4 ms +desc_sort_timestamp_no_can_match_shortcut: 5 ms +composite_terms: 6 ms +sort_keyword_no_can_match_shortcut: 4 ms +asc_sort_timestamp: 4 ms +asc_sort_timestamp_no_can_match_shortcut: 4 ms +query_string_on_message_filtered_sorted_num: 6 ms +date_histogram_hourly_agg: 5 ms +desc_sort_timestamp_can_match_shortcut: 4 ms +Total 39 queries succeed. Average duration: 10 ms +``` +``` +Summary of CalcitePPLBig5IT: +asc_sort_timestamp_can_match_shortcut: 84 ms +composite_date_histogram_daily: 150 ms +date_histogram_hourly_agg: 50 ms +terms_significant_1: 37 ms +range_numeric: 25 ms +asc_sort_timestamp: 17 ms +multi_terms_keyword: 38 ms +desc_sort_with_after_timestamp: 12 ms +range_field_conjunction_small_range_small_term_query: 40 ms +sort_numeric_desc_with_match: 17 ms +desc_sort_timestamp: 11 ms +composite_terms_keyword: 30 ms +query_string_on_message: 34 ms +range_with_asc_sort: 21 ms +scroll: 11 ms +query_string_on_message_filtered_sorted_num: 36 ms +range_field_disjunction_big_range_small_term_query: 29 ms +keyword_in_range: 38 ms +default: 9 ms +asc_sort_with_after_timestamp: 16 ms +asc_sort_timestamp_no_can_match_shortcut: 21 ms +range_field_conjunction_big_range_big_term_query: 22 ms +sort_numeric_asc: 12 ms +sort_numeric_asc_with_match: 11 ms +keyword_terms_low_cardinality: 21 ms +date_histogram_minute_agg: 25 ms +query_string_on_message_filtered: 23 ms +terms_significant_2: 34 ms +keyword_terms: 14 ms +term: 8 ms +range_field_conjunction_small_range_big_term_query: 17 ms +composite_terms: 25 ms +range_with_desc_sort: 19 ms +sort_numeric_desc: 10 ms +desc_sort_timestamp_can_match_shortcut: 11 ms +sort_keyword_can_match_shortcut: 12 ms +range: 17 ms +sort_keyword_no_can_match_shortcut: 11 ms +desc_sort_timestamp_no_can_match_shortcut: 10 ms +Total 39 queries succeed. Average duration: 26 ms +``` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - COMMENTED + + +> The target of this IT suite is displaying the end-to-end execution times + +I think this is benchmark instead of IT right? If so, we may put all these into `benchmarks` module? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +> > The target of this IT suite is displaying the end-to-end execution times +> +> I think this is benchmark instead of IT right? If so, we may put all these into `benchmarks` module? + +Unlike jmh micro-benchmark, Big5 Suite requires a remote cluster to be launched, so it located in integ-test module, or we have to move the `integ-test` configs to `benchmarks` module. + + +--- + +# PR #3821: Add compare_ip operator udfs + +**URL:** https://github.com/opensearch-project/sql/pull/3821 + +**Author:** @ishaoxy + +**Created:** 2025-06-25T10:08:00Z + +**State:** MERGED + +**Merged:** 2025-07-11T09:27:09Z + +**Changes:** +344 -87 (9 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description + +- Introduced a new UDF `CompareIpFunction` for IP address comparisons. +- Since Calcite does not provide a native SQL type for IP addresses, EXPR_IP is mapped to `SqlTypeName.OTHER`. +- Enhanced the `registerOperator` method to support registering one or multiple operators under a single function name, enabling function overloading based on operand types. +- Registered the newly implemented IP comparison operators under the existing comparison function names(e.g., =, !=, <, >, etc.). +- Added new and specific type checkers for IP comparison and CIDR functions to ensure accurate operand validation and operator resolution. + + + +### Related Issues +Resolves #3776 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @qianheng-aws - COMMENTED + + +Please add IT for ip comparison. + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +I think (STRING, STRING) should not be compared as IP? + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +lowercase `equals` may be more appropriate. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +I'm thinking whether there is a better way to implement the comparing classes without the need to declare 6 classes doing virtually the same thing. + +E.g. + +```java +class CompareIPFunction { + private BiPredicate biPredicate; + + private CompareIPFunction(BiPredicate biPredicate){ + this.biPredicate = biPredicate; + } + + public static CompareIPFunction less() { + return new CompareIPFunction((a, b) -> a < b); + } + +... + + public boolean compare(IPAddress ip1, IPAddress ip2){ + return biPredicate.test(IPUtils.compare(ip1, ip2), 0) + } + +... + +} +``` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java:None` + + +The project avoids using `import *`. You can configure the IDE to not convert multiple imports to `import *`. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java:None` + + +Maybe we should use new `PPLTypeChecker` here and override its `checkOperandTypes` method to only allow accept `string` or `ip`. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:None` + + +If we stop use `CompositeOperandTypeChecker` and create a new TypeChecker ourself, maybe we can avoid using `SqlTypeFamily.NULL` to represent EXPR_IP and check operands on RelDataType level. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:None` + + +@qianheng-aws How about this implementation https://github.com/ishaoxy/sql/pull/1 +It doesn't use NULL for IP, but created two other classes for IP type checking. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java:None` + + +It just came to me that there's not only compare that checks IP type. CIDR_MATCH also has to validate IP types. If we make a specific IP type checker for compare operators, we may also have to create one for each other functions like cidrmatch and geoip. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Questions, Do we need to support IP as UDT in PPL engine? + + +In OpenSearch PPL, IP handling depends on *index field type*. CIDR-based filtering should use ip_range queries for ip fields, script pushdown / in-memory processing for keyword, text, and runtime string fields. + +Field Type | Use Case | Expectation +-- | -- | -- +IP Field | search index=log ip="192.168.0.0/16" | Rewrite as term query +  | search index \| where cidrmatch("192.168.0.0/16", ip) | Rewrite as term query +Keyword Field | search index=log ip="192.168.0.0/16" | Rewrite as term query, extactally keyword match +  | search index \| where cidrmatch("192.168.0.0/16", ip) | Script pushdown — ip field is a string, not rewrite as term query. +Text Field | search index=log ip="192.168.0.0/16" | Rewrite as query_string query, full text search +  | search index \| where cidrmatch("192.168.0.0/16", ip) | Script pushdown — ip field is a string, not rewrite as term query. +Runtime Field | search index=log \| parse ip=regex(...)\| where ip="192.168.0.0/16" | Script pushdown — ip field is a string, it is a string comparsion query +  | search index=log \| parse ip=regex(...)\| where cidrmatch("192.168.0.0/16", ip) | Script pushdown — ip field is a string, not rewrite as term query. + + + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +Typing error, fixed. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +Thanks for reminding me this rule. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java:None` + + +Got it. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +Handled. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/EqualsIpFunction.java:None` + + +Tweaked the code a little bit: https://github.com/ishaoxy/sql/pull/2 + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +SqlStdOperatorTable.EQUALS` is a very basic std operator which widely used in calcite internal, I am worry that it may introduce potential bugs and performance regression for pushdown. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +But Calcite's built-in operators like `SqlStdOperatorTable.EQUALS` does not handle our UDT like IP. These new operators are only effective to IP comparison, controlled via type checkers. Comparison between other types will still falls to Calcite's built-in comparison operators. + +There was two solutions: +- one is to convert IP UDT to a type that is comparable by calcite's comparators. +- another is to add new operators exclusively for IP comparision + +We opted the latter. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +In order to keep the comparison logic the same as v2, I chose to add these specific operator udfs for IP comparison instead of converting it to string type that calcite can compare. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +> These new operators are only effective to IP comparison, controlled via type checkers. Comparison between other types will still falls to Calcite's built-in comparison operators. + +Can we move above logic to a specific method `registerOverrideOperator`, it quite confused me. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +Or modify to +``` +registerOperator(EQUAL, check(PPLBuiltinOperators.EQUALS_IP,SqlStdOperatorTable.EQUALS)); +``` +and leverage `check()` to manage the specific register logic via typeChecker. + +`check()` can name to `case()` or `set()` or `queue()`... + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:None` + + +> Or modify to +> +> ``` +> registerOperator(EQUAL, check(PPLBuiltinOperators.EQUALS_IP,SqlStdOperatorTable.EQUALS)); +> ``` +> +> and leverage `check()` to manage the specific register logic via typeChecker. +> +> `check()` can name to `case()` or `set()` or `queue()`... + +Modified, improving codes readability. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java:None` + + +Handled. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:126` + + +please update the PR description to align changes. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:499` + + +please add a javadoc for this method to explain what are the operators for + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:499` + + +Added. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java:126` + + +Thanks for reminding me. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java:None` + + +For efficiency consideration, we'd better move this switch case to the `implement` method + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java:None` + + +Why not leverage `ExprIpValue::compare`? Then we don't need to do extra transformation for `ExprIPValue` + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java:None` + + +Sorry for only focusing on IPUtils before and ignoring the functions encapsulated in ExprIpValue. Now fixed. + + +### @ishaoxy on `core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java:None` + + +Fixed. + + +## General Comments + + +### @qianheng-aws + + +I think we should implement `implicit conversion` before implementing comparison for non-primitive types. + +After that, for Ip comparison, we can convert string to ExprIP. And then we can implement some generic comparison functions for types of comparable, via calling their `compareTo` method. + +But current approach is still acceptable if we won't have other UDT in the future. + + +### @ishaoxy + + + + + +> Please add IT for ip comparison. + +Handled. + + +### @qianheng-aws + + +> @qianheng-aws How about this implementation https://github.com/ishaoxy/sql/pull/1 +It doesn't use NULL for IP, but created two other classes for IP type checking. + +Yes, I think that's what we need. In the future when we have type conversion, that IPTypeChecker should only accept type of IP, and type conversion should convert `String` to `ExprIpType` + +> It just came to me that there's not only compare that checks IP type. CIDR_MATCH also has to validate IP types. If we make a specific IP type checker for compare operators, we may also have to create one for each other functions like cidrmatch and geoip. + +Yes since we treat `ExprIPType` as a specific type instead of `String` + +@yuancu + + +### @LantaoJin + + +@ishaoxy please follow the instructions to backport manually + + +--- + +# PR #3819: [AUTO] Increment version to 3.2.0-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/3819 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-24T23:09:41Z + +**State:** MERGED + +**Merged:** 2025-08-06T01:26:04Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `v3.2.0`, `skip-changelog` + + +## Description + +- Incremented version to **3.2.0-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @brianf-aws + + +ML Commons 3.2 version bump has been merged + +https://github.com/opensearch-project/ml-commons/pull/3942 + + +--- + +# PR #3818: Shift some 3.1 release notes categories + +**URL:** https://github.com/opensearch-project/sql/pull/3818 + +**Author:** @Swiddis + +**Created:** 2025-06-24T22:23:43Z + +**State:** MERGED + +**Merged:** 2025-09-29T21:33:13Z + +**Changes:** +1 -3 (1 files) + +**Labels:** `skip-changelog` + + +## Description + +### Description +Breaking changes turn out to be forbidden for minor releases. I see why they're technically labeled as breaking, but it doesn't really match the spirit of what the release team means by it. ¯\\\_(ツ)\_/¯ + +To prevent similar release blockers down the road, I [updated my release note generator script to warn about this](https://github.com/Swiddis/opensearch-utils/commit/778778bd4dde6972648664d7e68817abb507ae8a). + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3817: [Backport] Unified query modules publishing to 2.19 + +**URL:** https://github.com/opensearch-project/sql/pull/3817 + +**Author:** @dai-chen + +**Created:** 2025-06-24T18:55:44Z + +**State:** MERGED + +**Merged:** 2025-06-25T16:15:48Z + +**Changes:** +640 -0 (7 files) + +**Labels:** `enhancement` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR backports the unified query API and related module publishing commits in `feature/unified-ppl` to PPL Calcite in 2.19 release line. This backport is a prerequisite for publishing the corresponding changes in the OpenSearch-Spark repository. + +Instead of backporting directly to the final destination `2.19-dev` branch, we are targeting a temporary intermediate branch `feature/unified-ppl-2.19-dev` for the following reasons: + +1. There are ongoing backports currently being merged into `2.19-dev`. +2. Further changes in `unified-query-api` may occur as development continues on the Spark side. + +#### Local Test + +``` +$ java -version +openjdk version "11.0.25" 2024-10-15 LTS +OpenJDK Runtime Environment Corretto-11.0.25.9.1 (build 11.0.25+9-LTS) +OpenJDK 64-Bit Server VM Corretto-11.0.25.9.1 (build 11.0.25+9-LTS, mixed mode) + +$ ./gradlew clean publishUnifiedQueryPublicationToMavenLocal + +➜ pwd +/Users/daichen/.m2/repository/org/opensearch/query +➜ tree -L 2 +. +├── unified-query-api +│   └── 2.19.0.0-SNAPSHOT +├── unified-query-common +│   └── 2.19.0.0-SNAPSHOT +├── unified-query-core +│   └── 2.19.0.0-SNAPSHOT +├── unified-query-opensearch +│   └── 2.19.0.0-SNAPSHOT +├── unified-query-ppl +│   └── 2.19.0.0-SNAPSHOT +├── unified-query-protocol +│   └── 2.19.0.0-SNAPSHOT +└── unified-query-sql + └── 2.19.0.0-SNAPSHOT +``` + +#### Next Steps + +Once both the `2.19-dev` and Spark-side changes have stabilized, we will submit the final PRs to: + +1. Migrate to new Maven repo endpoint: https://github.com/opensearch-project/opensearch-build/issues/5551 +2. Merge the `feature/unified-ppl` into `main` branch. +3. Backport all necessary commits to `2.19-dev` branch. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/3734 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3816: Add enforce-labels action + +**URL:** https://github.com/opensearch-project/sql/pull/3816 + +**Author:** @Swiddis + +**Created:** 2025-06-24T16:53:40Z + +**State:** MERGED + +**Merged:** 2025-06-26T17:23:16Z + +**Changes:** +13 -0 (1 files) + +**Labels:** `maintenance` + + +## Description + +### Description +In the [3.1 release notes](https://github.com/opensearch-project/sql/blob/main/release-notes/opensearch-sql.release-notes-3.1.0.0.md), there were ~20 PRs that didn't have release labels, so I had to manually figure out which categories they go into. I [automated the release note generation process](https://github.com/Swiddis/opensearch-utils/tree/main/release) 2 years ago on my old team, but the script requires the labels in order to be fully automated. This PR introduces an action for checking these labels exist. + +This same action has been used with success on dashboards-observability for ~2y. + +### Related Issues +Humans shouldn't need to manually create lists of the PRs in their release. (If we had annotated release notes that would be a different story, but org-wide we don't really bother to annotate the notes.) + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +Thanks! + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @Swiddis + + +No backport since past releases are already passed. + +Once this is merged I'll go through and tag everything merged between the 3.1 release and now. + + +### @Swiddis + + +Tagged all the merged PRs for 3.2, automation should work now + + +--- + +# PR #3815: [BUG] Fix flaky tests related to WeekOfYear in CalcitePPLDateTimeBuiltinFunctionIT + +**URL:** https://github.com/opensearch-project/sql/pull/3815 + +**Author:** @yuancu + +**Created:** 2025-06-24T08:31:20Z + +**State:** MERGED + +**Merged:** 2025-06-26T17:16:54Z + +**Changes:** +40 -13 (1 files) + +**Labels:** `flaky-test`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +This fixes flaky tests related to `YEARWEEK` function in certain locales. + +This fix assumes that the existing implementation is correct, and only fixes tests. + +The problem arises from https://github.com/opensearch-project/sql/blob/b610ce96f5a7772d7ebf15f0bea27d1103c63c92/core/src/main/java/org/opensearch/sql/expression/datetime/CalendarLookup.java#L62 + +When locale is in `ja-JP-u-ca-japanese-x-lvariant-JP`, the calendar implementation is `JapanImperialCalender`. Retrieving week of the year with this calendar on 1984-04-12 results in 14; while in `en-US` locale it returns 15. + +### Related Issues +Resolves #3814 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3813: [Backport 2.19-dev] Decimal literal should convert to double in pushdown + +**URL:** https://github.com/opensearch-project/sql/pull/3813 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-23T15:33:11Z + +**State:** MERGED + +**Merged:** 2025-06-25T00:10:48Z + +**Changes:** +54 -3 (2 files) + + +## Description + +Backport e32c945e40be5bdddcb5a227cd170e20d77b7885 from #3811. + + + +## Reviews + + +### @LantaoJin - APPROVED + + +CI contains flaky tests which are not related. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3811: Decimal literal should convert to double in pushdown + +**URL:** https://github.com/opensearch-project/sql/pull/3811 + +**Author:** @LantaoJin + +**Created:** 2025-06-23T10:29:34Z + +**State:** MERGED + +**Merged:** 2025-06-23T15:32:55Z + +**Changes:** +54 -3 (2 files) + +**Labels:** `enhancement`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +https://github.com/opensearch-project/sql/pull/3673 introduced a decimal literal in plan, decimal literal in expression can not be pushed down since the decimal literal in Calcite is not converted to double in pushdown, and only double/float data type supported in DSL. This PR converts decimal type to double type in handling pushdown. + +Note, this bug is only triggered when the field type in expression is integer + decimal literal. +For example, following query failed when `balance` is integer type in mapping, but success if `balance` is double. +``` +source=%s | where balance > 40000.00 | stats avg(balance) +``` + +### Related Issues +Resolves #3810 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3806: Update the maven snapshot publish endpoint and credential + +**URL:** https://github.com/opensearch-project/sql/pull/3806 + +**Author:** @zelinh + +**Created:** 2025-06-20T19:38:54Z + +**State:** MERGED + +**Merged:** 2025-08-05T03:10:17Z + +**Changes:** +5 -2 (3 files) + +**Labels:** `infrastructure`, `backport 2.19`, `backport 2.19-dev` + + +## Description + +### Description +Update the Maven Snapshots publish URL in accordance with the recent Sonatype migration. +https://central.sonatype.org/publish/publish-portal-snapshots/ + +We have stored the `onepassword` token in this repo secrets and new credentials for Sonatypes username & password have been stored in `onepassword`. These credentials will be exported as env variables which used by maven publish. + +### Related Issues +Part of a campaign issue https://github.com/opensearch-project/opensearch-build/issues/5551 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @zelinh + + +Not sure some of logics here in sql repo. Might need some help. + +According to Nexus Sonatype, https://central.sonatype.org/publish/publish-portal-snapshots/ +For publish and consumption, we would use endpoint `https://central.sonatype.com/repository/maven-snapshots/` +But for browsing or downloading from script, we may need to use https://central.sonatype.com/service/rest/repository/browse/maven-snapshots + +Also for `publish-snapshots-and-grammar-files.yml` workflow, I see some `curl` commands trying to push artifacts. We may also need to verify the backward compatibility of the new endpoint or onboard with a new way. + + +### @gaiksaya + + +Hi maintainers, +Please add the correct label to backport to active branches. +_Would recommend backporting this PR to 2.19 for 2.19.x artifacts for future releases._ + +Thanks! + + + +--- + +# PR #3805: [Backport main] Remove aviator dep + +**URL:** https://github.com/opensearch-project/sql/pull/3805 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-20T16:41:51Z + +**State:** MERGED + +**Merged:** 2025-06-20T18:03:19Z + +**Changes:** +0 -1 (1 files) + +**Labels:** `maintenance` + + +## Description + +Backport 98c0c21e4247361e1cbd10706a174ce9aba7a042 from #3804. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3804: Remove aviator dep + +**URL:** https://github.com/opensearch-project/sql/pull/3804 + +**Author:** @Swiddis + +**Created:** 2025-06-20T16:13:18Z + +**State:** MERGED + +**Merged:** 2025-06-20T16:41:34Z + +**Changes:** +0 -1 (1 files) + +**Labels:** `backport main`, `maintenance` + + +## Description + +### Description +Removes another unused dep which is causing CVE tooling to complain. Since the CVE is rated 9.8, policy won't allow an exemption here. + +We should really do a full unused dependency audit at some point, seems like we have a lot of these. + +### Related Issues +3.1 release blocker. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3803: Support join field list and join options + +**URL:** https://github.com/opensearch-project/sql/pull/3803 + +**Author:** @LantaoJin + +**Created:** 2025-06-20T03:30:11Z + +**State:** MERGED + +**Merged:** 2025-09-10T02:28:11Z + +**Changes:** +1408 -328 (31 files) + +**Labels:** `enhancement`, `PPL`, `backport-manually`, `backport-failed`, `calcite`, `backport 2.19-dev`, `v3.3.0` + +**Assignees:** @LantaoJin + + +## Description + +### Description +Support join field list and join options, see details in #3775 + +Example 1: Two indices join (3.0.0) +=========================== + +``` + PPL> source = state_country + | inner join left=a right=b ON a.name = b.name occupation + | stats avg(salary) by span(age, 10) as age_span, b.country; + + fetched rows / total rows = 5/5 + +-------------+----------+-----------+ + | avg(salary) | age_span | b.country | + |-------------+----------+-----------| + | 120000.0 | 40 | USA | + | 105000.0 | 20 | Canada | + | 0.0 | 40 | Canada | + | 70000.0 | 30 | USA | + | 100000.0 | 70 | England | + +-------------+----------+-----------+ +``` +Example 2: Join with options (3.3.0) +============================ +``` + PPL> source = state_country + | join type=inner overwrite=false max=1 name occupation + | stats avg(salary) by span(age, 10) as age_span, country; + + fetched rows / total rows = 5/5 + +-------------+----------+---------+ + | avg(salary) | age_span | country | + |-------------+----------+---------| + | 120000.0 | 40 | USA | + | 100000.0 | 70 | USA | + | 105000.0 | 20 | Canada | + | 70000.0 | 30 | USA | + +-------------+----------+---------+ +``` +Example 3: Join with subsearch (3.0.0) +============================== +``` + PPL> source = state_country as a + | where country = 'USA' OR country = 'England' + | left join ON a.name = b.name [ + source = occupation + | where salary > 0 + | fields name, country, salary + | sort salary + | head 3 + ] as b + | stats avg(salary) by span(age, 10) as age_span, b.country; + + fetched rows / total rows = 5/5 + +-------------+----------+-----------+ + | avg(salary) | age_span | b.country | + |-------------+----------+-----------| + | null | 40 | null | + | 70000.0 | 30 | USA | + | 100000.0 | 70 | England | + +-------------+----------+-----------+ +``` +Example 4: Join with field list with subsearch (3.3.0) +=============================== +``` + PPL> source = state_country + | where country = 'USA' OR country = 'England' + | join type=left overwrite=true name [ + source = occupation + | where salary > 0 + | fields name, country, salary + | sort salary + | head 3 + ] + | stats avg(salary) by span(age, 10) as age_span, country; + + fetched rows / total rows = 5/5 + +-------------+----------+-----------+ + | avg(salary) | age_span | country | + |-------------+----------+-----------| + | null | 40 | null | + | 70000.0 | 30 | USA | + | 100000.0 | 70 | England | + +-------------+----------+-----------+ +``` + + +### Related Issues +Resolves #3775 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `common/src/main/java/org/opensearch/sql/common/setting/Settings.java:None` + + +any other options? such as `plugins.ppl.splunk_compatible.enabled` to highlight `splunk` instead of `spl` + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +may catch up 3.1.0? + + +### @penghuo on `docs/user/ppl/admin/settings.rst:None` + + +What is expected behavior of PPL commands/function if is not compatible with Splunk grammer? + + +### @penghuo on `docs/user/ppl/cmd/join.rst:None` + + +we could limit to `inner | outer | left`, then add other join types with perf-test gradually. + + +### @penghuo on `docs/user/ppl/cmd/join.rst:None` + + +change `` to ` | ` + + +### @penghuo on `docs/user/ppl/cmd/join.rst:None` + + + is optional, change to [] + + +### @penghuo on `docs/user/ppl/cmd/join.rst:None` + + +Polish join-filed-list description, + +The fields used to build the join criteria. The join field list must exist on both sides. If no join field list is specified, all fields common to both sides will be used as join keys. + + +### @penghuo on `docs/user/ppl/cmd/join.rst:None` + + +add `[left=] [right=] where ...` in syntax? + + +### @penghuo on `docs/user/ppl/cmd/join.rst:None` + + +it is expected behavior, not limitation, right? + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +why not support alias? `left=l right=r` + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +allow options to appear before or after fieldList, e.g. `... | join id overwrite=true [search index=product]` + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +in spl mode, `... | inner join xxx` with throw syntax exception, in which case user will see this error message, + + +### @penghuo on `ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java:None` + + +JoinType is not a valid token in SPL mode. The AST builder should be aware of SPL mode and handle it separately. + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +We should expose only one syntax at a time? Concern of enable both PPL and SPL-compatible syntax is +* Hard to document and maintain, tooling (like syntax highlighting, query editors) must also be mode-aware. +* Increases complexity of semantic validation and testing. + + +### @LantaoJin on `docs/user/ppl/admin/settings.rst:None` + + +Current implementation is `SPL-compatible`, not `SPL-only`. +For example: +When compatible config is true, user can run both `rex` and `parse` command. When compatible config is false, only `parse` command works. + +If we want `SPL-only`, when `SPL-only` config is true, `parse` command won't work any more. Which one do you prefer? + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +Em, I think we can add a config to enable other join type, and disable them by default, instead of removing from code. + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +> add `[left=] [right=] where ...` in syntax? + +this is spl2 syntax, I don't think we need to support it in compatible mode. + +`[left=] [right=] on ...` is listed in default PPL syntax + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +> why not support alias? `left=l right=r` + +in `sideAlias` + +> allow options to appear before or after fieldList + +Before only. + + +### @LantaoJin on `ppl/src/main/antlr/OpenSearchPPLParser.g4:None` + + +The current design is when the config is set to true, both ppl and spl syntax are supported. For your concerns, let me check the challenge in implementation level next week. I don’t think the document has any blocker. In future we can add a legacy config to disable the current ppl syntax as long as the implementation level has no blocker. For join, the current ppl join is a mutation of join of spl2. We may still need to support since splunk can both support spl1 and spl2. For your second concern, as long as implementation part has no blocker, the validation should work well since validation is done by antlr4 itself. + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +fixed + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1029` + + +[question] Why not using 2 directly here? I think we should join the top 2 operators in the stack. And In which case will it be greater than 2? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1029` + + +this method invoked after join condition resolving, if the joinCondition contains subsearch, the top of stack is the subsearch table. So I pick the `stackSize - 1` and `stackSize - 2` as left and right. + + +### @yuancu on `docs/user/admin/settings.rst:786` + + +Should the new option `plugins.calcite.all_join_types.allowed` be explained here? + + + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:75` + + +I found that it has problem running this query in DSL: + +```json +{ + "error": { + "reason": "Error occurred in OpenSearch engine: all shards failed", + "details": "Shard[0]: SearchException[cannot use `collapse` in conjunction with `search_after`]\n\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", + "type": "SearchPhaseExecutionException" + }, + "status": 500 +} +``` + +log: + +``` +[DEBUG][o.o.a.s.TransportSearchAction] [7cf34de73b85] [1CSDb_IxTA6-MeU1m4iE6Q][opensearch-sql_test_index_bank][0]: Failed to execute [SearchRequest{searchType=QUERY_THEN_FETCH, indices=[opensearch-sql_test_index_bank], indicesOptions=IndicesOptions[ignore_unavailable=false, allow_no_indices=true, expand_wildcards_open=true, expand_wildcards_closed=false, expand_wildcards_hidden=false, allow_aliases_to_multiple_indices=true, forbid_closed_indices=true, ignore_aliases=false, ignore_throttled=true], routing='null', preference='null', requestCache=null, scroll=null, maxConcurrentShardRequests=0, batchedReduceSize=512, preFilterShardSize=null, allowPartialSearchResults=true, localClusterAlias=null, getOrCreateAbsoluteStartMillis=-1, ccsMinimizeRoundtrips=true, source={"from":0,"size":10000,"timeout":"1m","query":{"exists":{"field":"account_number","boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}],"search_after":[32],"collapse":{"field":"account_number"},"pit":{"id":"w6q4QQEeb3BlbnNlYXJjaC1zcWxfdGVzdF9pbmRleF9iYW5rFnpYV2lDeEQ2VDU2aTJXeW9lWVBsa0EAFjFDU0RiX0l4VEE2LU1lVTFtNGlFNlEAAAAAAAAAAC8WR0I1WXNOZzhTSm16X293NWxaVU5GdwEWelhXaUN4RDZUNTZpMld5b2VZUGxrQQAA"}}, cancelAfterTimeInterval=null, pipeline=null, phaseTook=false}] lastShard [true] +[DEBUG][o.o.a.s.TransportSearchAction] [7cf34de73b85] All shards failed for phase: [query] +org.opensearch.search.SearchException: cannot use `collapse` in conjunction with `search_after` + at org.opensearch.search.SearchService.parseSource(SearchService.java:1643) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.search.SearchService.createContext(SearchService.java:1207) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.search.SearchService.executeQueryPhase(SearchService.java:772) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.search.SearchService$2.lambda$onResponse$0(SearchService.java:738) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:74) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.action.ActionRunnable$2.doRun(ActionRunnable.java:89) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.threadpool.TaskAwareRunnable.doRun(TaskAwareRunnable.java:78) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:59) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:975) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) ~[opensearch-3.2.0-SNAPSHOT.jar:3.2.0-SNAPSHOT] + at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?] + at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?] + at java.base/java.lang.Thread.run(Thread.java:1575) [?:?] +``` + +I'm not sure where is this pagination option from. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +nit: may be renamed to `buildJoinConditionByFieldName` + + +### @dai-chen on `ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java:None` + + +Can `JoinType.valueOf()` help simplify here? + + +### @dai-chen on `ppl/src/main/antlr/OpenSearchPPLParser.g4:1324` + + +Is this related to this PR? + + +### @dai-chen on `docs/user/ppl/cmd/join.rst:27` + + +Cross join also requires join condition? + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:27` + + +yes. it requires the join condition such as `on 1=1`. BTW, the cross join disabled by default from 3.3.0 + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:75` + + +Good catching! OpenSearch `collapse` cannot work with `search_after` for now. And this pushdown only works when there is a limitation in right size and the limitation size is less than max_window. + + +## General Comments + + +### @RyanL1997 + + +Hi @LantaoJin, are you still working on this change? + + +### @LantaoJin + + +> Hi @LantaoJin, are you still working on this change? + +yes, will submit a new refactor. + + +### @LantaoJin + + +Gentle ping @penghuo @dai-chen @qianheng-aws @yuancu + + +--- + +# PR #3802: [Backport 2.19-dev] Remove unneeded dependency on commons-validator + +**URL:** https://github.com/opensearch-project/sql/pull/3802 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-20T01:39:15Z + +**State:** MERGED + +**Merged:** 2025-06-20T13:59:44Z + +**Changes:** +0 -1 (1 files) + +**Labels:** `maintenance` + + +## Description + +Backport 433bb9ed1aa802e3e73525dadb99558a9e11c6d0 from #3800. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +linkcheck failure should be resolved by https://github.com/opensearch-project/sql/pull/3795 + + +--- + +# PR #3801: Update the limitation docs + +**URL:** https://github.com/opensearch-project/sql/pull/3801 + +**Author:** @LantaoJin + +**Created:** 2025-06-20T01:24:02Z + +**State:** MERGED + +**Merged:** 2025-07-01T07:23:13Z + +**Changes:** +60 -17 (3 files) + +**Labels:** `documentation`, `calcite`, `v3.1.0`, `backport 3.1` + + +## Description + +### Description +Update the limitation docs. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +ping @qianheng-aws @dai-chen @penghuo @Swiddis + + +--- + +# PR #3800: Backport 3797 to 2.x + +**URL:** https://github.com/opensearch-project/sql/pull/3800 + +**Author:** @Swiddis + +**Created:** 2025-06-19T21:20:12Z + +**State:** MERGED + +**Merged:** 2025-06-19T21:33:04Z + +**Changes:** +0 -1 (1 files) + +**Labels:** `maintenance`, `backport 2.19`, `backport 2.19-dev` + + +## Description + +### Description +Backport #3797 to 2.x + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3799: [Backport-main] Remove unneeded dependency on commons-validator + +**URL:** https://github.com/opensearch-project/sql/pull/3799 + +**Author:** @Swiddis + +**Created:** 2025-06-19T21:19:08Z + +**State:** MERGED + +**Merged:** 2025-06-20T01:37:52Z + +**Changes:** +0 -2 (2 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Backport #3797 to main + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3798: Change compare logical when comparing date related fields with string literal + +**URL:** https://github.com/opensearch-project/sql/pull/3798 + +**Author:** @xinyual + +**Created:** 2025-06-19T09:39:41Z + +**State:** MERGED + +**Merged:** 2025-07-02T05:09:59Z + +**Changes:** +246 -21 (10 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `calcite`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +The PR change the logical when compare with date/time/timestamp with a string literal to align with V2. Now we will cast the string literal to date/time/timestamp according to the compare target so it can be pushed down. +For example, previous, if we compare date < '2020-10-20 12:00:00', we will cast date to timestamp and append 00:00:00 to the date so they can compare. Now we cast the string literal to date as '2020-10-20'. Same as time. + +For ppl like +``` +source=XXX | where timestamp_field < '' +``` +for example, +``` +source=XXX | where birthdate < '2020-10-20 00:00:00' +``` +The calcite physical plan change from +``` +EnumerableCalc(expr#0..12=[{inputs}], expr#13=[TIMESTAMP($t3)], expr#14=['2016-12-08 00:00:00':EXPR_TIMESTAMP VARCHAR], expr#15=[>($t13, $t14)], expr#16=['2018-11-09 00:00:00':EXPR_TIMESTAMP VARCHAR], expr#17=[<($t13, $t16)], expr#18=[AND($t15, $t17)], proj#0..12=[{exprs}], $condition=[$t18])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` +to +``` +CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->>($3, '2016-12-08 00:00:00'), FILTER-><($3, '2018-11-09 00:00:00')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"birthdate\":{\"from\":\"2016-12-08T00:00:00.000Z\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"birthdate\":{\"from\":null,\"to\":\"2018-11-09T00:00:00.000Z\",\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])" +``` + +same as time/date, +for date, the physical plan of PPL +``` +source=opensearch-sql_test_index_date_formats | fields yyyy-MM-dd +| where yyyy-MM-dd > '2016-12-08 00:00:00.123456789' +| where yyyy-MM-dd < '2018-11-09 00:00:00.000000000' +``` + from +``` +EnumerableCalc(expr#0=[{inputs}], expr#1=[TIMESTAMP($t0)], expr#2=['2016-12-08 00:00:00.123456789':EXPR_TIMESTAMP VARCHAR], expr#3=[>($t1, $t2)], expr#4=['2018-11-09 00:00:00':EXPR_TIMESTAMP VARCHAR], expr#5=[<($t1, $t4)], expr#6=[AND($t3, $t5)], yyyy-MM-dd=[$t0], $condition=[$t6])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"yyyy-MM-dd\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` +to +``` +CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->>($0, '2016-12-08'), FILTER-><($0, '2018-11-09')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"yyyy-MM-dd\":{\"from\":\"2016-12-08\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"yyyy-MM-dd\":{\"from\":null,\"to\":\"2018-11-09\",\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"yyyy-MM-dd\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` + +for time, the physical plan of PPL +``` +source=opensearch-sql_test_index_date_formats | fields custom_time +| where custom_time > '2016-12-08 12:00:00.123456789' +| where custom_time < '2018-11-09 19:00:00.123456789' +``` +from +``` +EnumerableCalc(expr#0=[{inputs}], expr#1=[TIMESTAMP($t0)], expr#2=['2016-12-08 12:00:00.123456789':EXPR_TIMESTAMP VARCHAR], expr#3=[>($t1, $t2)], expr#4=['2018-11-09 19:00:00.123456789':EXPR_TIMESTAMP VARCHAR], expr#5=[<($t1, $t4)], expr#6=[AND($t3, $t5)], custom_time=[$t0], $condition=[$t6])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"custom_time\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` +to +``` +CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->>($0, '12:00:00.123456789'), FILTER-><($0, '19:00:00.123456789')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"custom_time\":{\"from\":\"12:00:00.123456789\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"custom_time\":{\"from\":null,\"to\":\"19:00:00.123456789\",\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"custom_time\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +``` + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] +#3710 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - CHANGES_REQUESTED + + +https://github.com/opensearch-project/sql/pull/3798#discussion_r2160765041 + + +## Review Comments + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1069` + + +what if date/string comparing in a filter? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +``` +source = big5 | where `@timestamp` >= '2023-01-01 00:00:00.000000000' and `@timestamp` < '2023-01-03 00:00:00.000000000' +``` +Here is an example of big5 query, can you test with similar usage? + + +### @xinyual on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +Yes. Already add IT like this one. + + +### @xinyual on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1069` + + +Date still needs to use timestamp to convert since we need to align with V2. We need to support something like `date('2020-10-20') = '2020-10-20 00:00:00'` since v2 will do the implicit conversion. So compare date/string cannot be pushed down. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1069` + + +Do you mean `date/string comparing in a filter` can pushdown in v2, not cannot in v3? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +IMO, the case I listed was diff with the one you added. Please try `where birthdate < '2018-11-09 00:00:00.000000000'` + + +### @xinyual on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java:1069` + + +Already change and now date/string, time/string can be pushed down as V2. + + +### @xinyual on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java:None` + + +Got it. Already change it with nano second. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:317` + + +please add javadoc for public static methods + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:354` + + +ditto + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:370` + + +ditto + + +### @xinyual on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:370` + + +Done. + + +### @xinyual on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:354` + + +Done. + + +### @xinyual on `core/src/main/java/org/opensearch/sql/utils/DateTimeUtils.java:317` + + +Done. + + +## General Comments + + +### @LantaoJin + + +Please add a test case in `CalciteExplainIT`. + + +### @xinyual + + +> Please add a test case in `CalciteExplainIT`. + +Already add one in CalcitePPLExplainIT + + +### @LantaoJin + + +> > Please add a test case in `CalciteExplainIT`. +> +> Already add one in CalcitePPLExplainIT + +Please use `CalciteExplainIT`, they are for different purposes. +`CalcitePPLExplainIT` is an IT for `explain` command + + +### @LantaoJin + + +Please update the PR description to add the Calcite physical plans before and after this patching. + + +### @xinyual + + +Force push to resolve dco problem + + +### @xinyual + + +> Please update the PR description to add the Calcite physical plans before and after this patching. + +Already done. Please check it. + + +### @xinyual + + +> > > Please add a test case in `CalciteExplainIT`. +> > +> > +> > Already add one in CalcitePPLExplainIT +> +> Please use `CalciteExplainIT`, they are for different purposes. `CalcitePPLExplainIT` is an IT for `explain` command + +Already put it in correct place. Please check it. + + +### @LantaoJin + + +> Please update the PR description to add the Calcite physical plans before and after this patching. + +@xinyual + + +### @LantaoJin + + +ping @qianheng-aws + + +### @LantaoJin + + +@xinyual could you manually backport it to 2.19-dev? + + +--- + +# PR #3797: Remove unneeded dependency on commons-validator + +**URL:** https://github.com/opensearch-project/sql/pull/3797 + +**Author:** @Swiddis + +**Created:** 2025-06-18T20:39:39Z + +**State:** MERGED + +**Merged:** 2025-06-19T20:02:59Z + +**Changes:** +0 -2 (2 files) + +**Labels:** `backport 2.x`, `backport main` + + +## Description + +### Description + +Well, that was fun. + +After #3782 didn't actually update the beanutils dep, I did a bunch of tracing how we depend on it, such that the resolution isn't actually changing anything. It turns out we import this dependency via `apache.commons.validator`, which [has an open issue to update beanutils as of a couple weeks ago](https://issues.apache.org/jira/browse/VALIDATOR-500). So I went to figure out what we depend on the validator for. + +Answer: we don't. The last ref was removed in #2130 ([diff](https://github.com/opensearch-project/sql/pull/2130/files#diff-b9c313c1ef71065a4f4a166763b18b244b1c6d5908cf4ed3512337ce6a4ca98eL11)). + +So removing it will remove pointless CVE chasing, and make our bundle smaller. + +### Related Issues +Same CVE as the last one. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @brianf-aws + + +Hey @Swiddis I'm curious to know how you were able to trace it? Looking to get some insight as our plugin does use org.apache.commons:commons-beanutils. I forced it 1.10 but when I check its gradle cache I can see its pulling in 1.9.4 via a transitive dependency + + +### @Swiddis + + +In our case we had to get it to show dependencies just for our plugin's build task, `gradle :opensearch-sql:dependencies` instead of `gradle dependencies`. I'm not sure what the equivalent task group is for ml-commons. + + +--- + +# PR #3796: change file location and handle concurrence + +**URL:** https://github.com/opensearch-project/sql/pull/3796 + +**Author:** @ahkcs + +**Created:** 2025-06-18T20:38:23Z + +**State:** MERGED + +**Merged:** 2025-06-18T20:42:51Z + +**Changes:** +12 -4 (3 files) + +**Labels:** `maintenance` + + +## Description + +change file location and handle concurrence + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3795: [Backport 2.19-dev] BUILD: fix linkchecker in 2.x + +**URL:** https://github.com/opensearch-project/sql/pull/3795 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-18T17:00:01Z + +**State:** MERGED + +**Merged:** 2025-06-20T13:59:07Z + +**Changes:** +3101 -1 (21 files) + +**Labels:** `infrastructure` + + +## Description + +Backport 7268bc5caaaccd7891c9f9c8b7aa2ff86389f57c from #3793. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +Flaky tests in 2.19-dev: +``` +REPRODUCE WITH: ./gradlew ':integ-test:integTest' --tests "org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testDateFormatAndDatetimeAndFromDays" -Dtests.seed=B00BC761C9607C01 -Dtests.security.manager=false -Dtests.locale=ja-JP-u-ca-japanese-x-lvariant-JP -Dtests.timezone=America/Rainy_River -Druntime.java=17 +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testDateFormatAndDatetimeAndFromDays FAILED + java.lang.AssertionError: + Expected: iterable with items [[1984-04-12, 1984-Apr-12th 12:00:00 AM, 12.04.84 9:07 AM, 09:07:42, Wed 08 26 13 01 239 13 August 00 00 Wednesday 3 %, 2017-11-02, 1984-04-12 23:07:42, 1984-04-12 15:07:42, 15 1984 15, 15 15 1984, 09:07:42.000123]] in any order + but: not matched: <["1984-04-12","1984-Apr-12th 12:00:00 AM","12.04.84 9:07 AM","09:07:42","Wed 08 26 13 01 239 13 August 00 00 Wednesday 3 %","2017-11-02","1984-04-12 23:07:42","1984-04-12 15:07:42","14 1984 14","15 15 1984","09:07:42.000123"]> + at __randomizedtesting.SeedInfo.seed([B00BC761C9607C01:3D38B329F58C4320]:0) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:18) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:6) + at org.opensearch.sql.util.MatcherUtils.verify(MatcherUtils.java:188) + at org.opensearch.sql.util.MatcherUtils.verifyDataRows(MatcherUtils.java:160) + at org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testDateFormatAndDatetimeAndFromDays(CalcitePPLDateTimeBuiltinFunctionIT.java:1009) + +REPRODUCE WITH: ./gradlew ':integ-test:integTest' --tests "org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testYearWeek" -Dtests.seed=B00BC761C9607C01 -Dtests.security.manager=false -Dtests.locale=ja-JP-u-ca-japanese-x-lvariant-JP -Dtests.timezone=America/Rainy_River -Druntime.java=17 +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testYearWeek FAILED + java.lang.AssertionError: + Expected: iterable with items [[198415, 202524, 198415, 202034, 201901]] in any order + but: not matched: <[198414,202524,198414,202034,201901]> + at __randomizedtesting.SeedInfo.seed([B00BC761C9607C01:CF4F843FCDC68BBB]:0) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:18) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:6) + at org.opensearch.sql.util.MatcherUtils.verify(MatcherUtils.java:188) + at org.opensearch.sql.util.MatcherUtils.verifyDataRows(MatcherUtils.java:160) + at org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testYearWeek(CalcitePPLDateTimeBuiltinFunctionIT.java:494) + +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testTimeStrToDate PASSED + + +REPRODUCE WITH: ./gradlew ':integ-test:integTest' --tests "org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testWeekAndWeekOfYearWithFilter" -Dtests.seed=B00BC761C9607C01 -Dtests.security.manager=false -Dtests.locale=ja-JP-u-ca-japanese-x-lvariant-JP -Dtests.timezone=America/Rainy_River -Druntime.java=17 +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testWeekAndWeekOfYearWithFilter FAILED + java.lang.AssertionError: + Expected: iterable with items [[2]] in any order + but: not matched: <[0]> + at __randomizedtesting.SeedInfo.seed([B00BC761C9607C01:A9A6B15FBA9E99D6]:0) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:18) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:6) + at org.opensearch.sql.util.MatcherUtils.verify(MatcherUtils.java:188) + at org.opensearch.sql.util.MatcherUtils.verifyDataRows(MatcherUtils.java:160) + at org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testWeekAndWeekOfYearWithFilter(CalcitePPLDateTimeBuiltinFunctionIT.java:438) + +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testSysdate PASSED + +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testTimestampWithTimeInput PASSED + +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testTime PASSED + +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testExtractWithSimpleFormats PASSED + + +REPRODUCE WITH: ./gradlew ':integ-test:integTest' --tests "org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testWeekAndWeekOfYear" -Dtests.seed=B00BC761C9607C01 -Dtests.security.manager=false -Dtests.locale=ja-JP-u-ca-japanese-x-lvariant-JP -Dtests.timezone=America/Rainy_River -Druntime.java=17 +org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT > testWeekAndWeekOfYear FAILED + java.lang.AssertionError: + Expected: iterable with items [[15, 15, 15, 15, 7, 8]] in any order + but: not matched: <[14,14,15,15,7,7]> + at __randomizedtesting.SeedInfo.seed([B00BC761C9607C01:EB38628B74D202C0]:0) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:18) + at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:6) + at org.opensearch.sql.util.MatcherUtils.verify(MatcherUtils.java:188) + at org.opensearch.sql.util.MatcherUtils.verifyDataRows(MatcherUtils.java:160) + at org.opensearch.sql.calcite.standalone.CalcitePPLDateTimeBuiltinFunctionIT.testWeekAndWeekOfYear(CalcitePPLDateTimeBuiltinFunctionIT.java:421) +``` +cc @yuancu + +@penghuo can you help to merge this? Which is fix linkchecker failures. + + +--- + +# PR #3793: BUILD: fix linkchecker in 2.x + +**URL:** https://github.com/opensearch-project/sql/pull/3793 + +**Author:** @LantaoJin + +**Created:** 2025-06-18T08:19:22Z + +**State:** MERGED + +**Merged:** 2025-06-18T16:59:47Z + +**Changes:** +3101 -1 (21 files) + +**Labels:** `infrastructure`, `backport 2.19-dev` + + +## Description + +### Description +Linkchecker failed with network error: +``` +Run /home/runner/work/_actions/lycheeverse/lychee-action/master/entrypoint.sh + /home/runner/work/_actions/lycheeverse/lychee-action/master/entrypoint.sh + shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} + env: + GITHUB_TOKEN: *** + INPUT_TOKEN: *** + INPUT_ARGS: --accept=200,403,429,999 "./**/*.html" "./**/*.md" "./**/*.txt" --exclude "[https://aws.oss.sonatype.*](https://aws.oss.sonatype.%2A/)" "http://localhost*" "https://localhost" "https://odfe-node1:9200/" "https://community.tableau.com/docs/DOC-17978" ".*family.zzz" "https://pypi.python.org/pypi/opensearchsql/" "opensearch*" ".*@amazon.com" ".*email.com" "git@github.com" "http://timestamp.verisign.com/scripts/timstamp.dll" ".*/PowerBIConnector/bin/Release" + INPUT_DEBUG: false + INPUT_FAIL: true + INPUT_FAILIFEMPTY: true + INPUT_FORMAT: markdown + INPUT_JOBSUMMARY: true + INPUT_CHECKBOX: true + INPUT_OUTPUT: lychee/out.md +Error: Network error + +Caused by: + 0: error sending request for url (https://localhost/) + 1: client error (Connect) + 2: tcp connect error: Connection refused (os error 111) + 3: Connection refused (os error 111) +``` + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +After patching: +Screenshot 2025-06-18 at 16 44 38 + + + +--- + +# PR #3791: [Backport 2.19-dev] Resolve commons beanutils to 1.11.0 + +**URL:** https://github.com/opensearch-project/sql/pull/3791 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-18T05:49:19Z + +**State:** MERGED + +**Merged:** 2025-06-18T17:00:13Z + +**Changes:** +1 -0 (1 files) + + +## Description + +#3782 not works for 2.x. Upgrade `commons-beanutils` to `1.11.0` + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +linkchecker failure issue should be resolved by https://github.com/opensearch-project/sql/pull/3793 + + +--- + +# PR #3790: [Backport 2.19-dev] Migrate standalone integration tests to remote tests + +**URL:** https://github.com/opensearch-project/sql/pull/3790 + +**Author:** @yuancu + +**Created:** 2025-06-18T02:54:02Z + +**State:** MERGED + +**Merged:** 2025-06-18T15:42:10Z + +**Changes:** +3056 -2957 (75 files) + + +## Description + +### Description +Migrate standalone integration tests to remote tests (#3778) + + +* Use PPL_SPEC in standalone ITs + +Additionally: +- Migrate CalcitePPLBasicIT to remote ITs +- Delete CalcitePPLBasicPushdownIT since the remote ITs enable pushdown by default + + + +* Migrate array, aggregation, appendcol, and datetime ITs to remote + + + +* Migrate case, datetime, condition, cryptographic function ITs + + + +* Fix return types in CalcitePPLTpchIT + + + +* migrate calcite IT and delete related pushdown IT + + + +* Temporarily move datetime ITs back to standalone mode + + + +* WIP: trying to enbale no-pushdown tests + + + +* add pushdown IT button + + + +* Fix remote ITs + + + +* Enable no pushdown tests + + + +* Change pushdown settings only when necessary (fixing CalciteSettingsIT) + + + +* Fix sum null tests with and without pushdown + + + +* Update pushdown with transient flag + + + +* Ignore IP comparison and explain ITs in CalciteNoPushdownIT + + + +* Delete CalcitePushdownIT since all tests are run with pushdown enabled by default + + + +* Restore pushdown config after CalciteNoPushdownIT class + + + +* Move expected explain strings to files to circurmvent issues related to comparison involves windows new lines + + + +* Throw narrower exceptions where it uses loadFromFile + + + +* Replace window line enddings with unix line enddings for remote tests' responses + + + +* Revert "Replace window line enddings with unix line enddings for remote tests' responses" + +This reverts commit 812fd0dc4ef511833bb43211a66f09362230986a. + + + +* Use explainQueryToString for explain ITs + + + +* Use executeWithReplace for explain command variants + + + +* Migrate CalcitePPLTpchIT to remote test + + + +* Replace windows carriages with spaces when sanitizing queries to explain + + + +--------- + + + + +(cherry picked from commit 918c153ca99565e304364a5765f950aab86df64a) + +### Related Issues + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @yuancu + + +CI Fixed; using force-push to make the commit history cleaner. + + +### @LantaoJin + + +Link Checker issue should be resolved by https://github.com/opensearch-project/sql/pull/3793 + + +### @LantaoJin + + +@penghuo can you help to merge this? + + +--- + +# PR #3788: [Backport main] Add 3.1.0 release notes + +**URL:** https://github.com/opensearch-project/sql/pull/3788 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-17T14:09:09Z + +**State:** MERGED + +**Merged:** 2025-06-17T16:01:54Z + +**Changes:** +57 -0 (1 files) + + +## Description + +Backport 8f76f8fa37836ecf927b21ede6a685977d3a56b0 from #3781. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3787: [Backport main] Resolve commons beanutils to 2.0 + +**URL:** https://github.com/opensearch-project/sql/pull/3787 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-06-17T14:07:54Z + +**State:** MERGED + +**Merged:** 2025-06-17T16:02:59Z + +**Changes:** +1 -0 (1 files) + + +## Description + +Backport ee8f30db3ced0c488f390807cc587bf923ee90c3 from #3782. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3786: Add explain ITs with Calcite without pushdown + +**URL:** https://github.com/opensearch-project/sql/pull/3786 + +**Author:** @yuancu + +**Created:** 2025-06-17T13:44:39Z + +**State:** MERGED + +**Merged:** 2025-07-11T03:37:25Z + +**Changes:** +269 -161 (41 files) + +**Labels:** `maintenance`, `testing`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Add Integration tests for `explain` command with pushdown disabled in Calcite + +### Related Issues +Resolves #3785 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:432` + + +In the future, just wonder in which case we may disable pushdown? Is this only for testing purpose? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java:432` + + +For testing purpose, we need to confirm all operations running in Calcite could meet our expects. We wouldn't disable pushdown config. But all operators should work as expected without pushdown. For example, filter operator can be pushdown in normal. But it cannot be pushdown after join operator. + + +## General Comments + + +### @qianheng-aws + + +@yuancu conflict + + +### @yuancu + + +Will re-work on this after merging #3835 + + +### @yuancu + + +The failed test case can be reproduced with the following command in the main branch: + +```bash +./gradlew ':integ-test:yamlRestTest' --tests "org.opensearch.sql.rest.RestHandlerClientYamlTestSuiteIT.test {yaml=issues/3102/Prevent push down limit if the offset reach max_result_window}" -Dtests.seed=37CD9BA42072D18E +``` + +Note that it can only be reproduced with the given seed. + +I haven't figured out what went wrong. + +Relevant issue: #3102 + +
    + + details + +- test + +```yaml +"Prevent push down limit if the offset reach max_result_window": + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test | head 1 from 1 ' + - match: {"total": 1} + - match: {"schema": [{"name": "id", "type": "bigint"}]} + - match: {"datarows": [[2]]} + + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test | head 2 | head 1 from 1 ' + - match: { "total": 1 } + - match: { "schema": [ { "name": "id", "type": "bigint" } ] } + - match: { "datarows": [ [ 2 ] ] } +``` +- stacktrace +``` + java.lang.AssertionError: Failure at [issues/3102:25]: got unexpected warning header [ + 299 OpenSearch-3.1.0-SNAPSHOT-aed1264b0b385a9735ce1f5156ddd56506f109c3 "Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled" + ] + at __randomizedtesting.SeedInfo.seed([37CD9BA42072D18E:BF99A47E8E8EBC76]:0) + at org.opensearch.test.rest.yaml.OpenSearchClientYamlSuiteTestCase.executeSection(OpenSearchClientYamlSuiteTestCase.java:460) + at org.opensearch.test.rest.yaml.OpenSearchClientYamlSuiteTestCase.test(OpenSearchClientYamlSuiteTestCase.java:433) + at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103) + at java.base/java.lang.reflect.Method.invoke(Method.java:580) + at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) + at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) + at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) + at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at org.junit.rules.RunRules.evaluate(RunRules.java:20) + at org.apache.lucene.tests.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:48) + at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) + at org.apache.lucene.tests.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:45) + at org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) + at org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) + at org.junit.rules.RunRules.evaluate(RunRules.java:20) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) + at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) + at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) + at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) + at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) + at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) + at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) + at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at org.apache.lucene.tests.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:38) + at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) + at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at org.apache.lucene.tests.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) + at org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) + at org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) + at org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) + at org.apache.lucene.tests.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:47) + at org.junit.rules.RunRules.evaluate(RunRules.java:20) + at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) + at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) + at java.base/java.lang.Thread.run(Thread.java:1575) + + Caused by: + java.lang.AssertionError: got unexpected warning header [ + 299 OpenSearch-3.1.0-SNAPSHOT-aed1264b0b385a9735ce1f5156ddd56506f109c3 "Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled" + ] + at org.junit.Assert.fail(Assert.java:89) + at org.opensearch.test.rest.yaml.section.DoSection.checkWarningHeaders(DoSection.java:395) + at org.opensearch.test.rest.yaml.section.DoSection.execute(DoSection.java:323) + at org.opensearch.test.rest.yaml.OpenSearchClientYamlSuiteTestCase.executeSection(OpenSearchClientYamlSuiteTestCase.java:449) + ... 39 more +``` + +
    + + +--- + +# PR #3783: Add unified query API for external integration + +**URL:** https://github.com/opensearch-project/sql/pull/3783 + +**Author:** @dai-chen + +**Created:** 2025-06-16T22:59:46Z + +**State:** MERGED + +**Merged:** 2025-06-24T18:01:02Z + +**Changes:** +554 -1 (6 files) + +**Labels:** `enhancement` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces a new api module containing the `UnifiedQueryPlanner` class, which provides a high-level interface for parsing and planning PPL queries. This module is designed to support external consumers such as Spark and CLI without exposing Calcite or OpenSearch internals. README and unit tests are included to document usage and verify correctness. + +### Related Issues + +Resolves https://github.com/opensearch-project/sql/issues/3734 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `settings.gradle:11` + + +~~Should we include it as part of the plugin snapshot and keep the plugin as a multipurposed jar like this? Seems like something where we should make the core SQL functionality a library, the opensearch plugin is one target that we can build, and the programmatic API is a different target.~~ Ok if I'm understanding right this is being shipped as its own library already as part of the module list earlier, it just seems odd to also put it in the OS plugin if the OS plugin isn't using it + + +### @Swiddis on `api/README.md:21` + + +How does one execute the plan after they receive it? + + +### @Swiddis on `api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java:92` + + +Is there a specific technical reason we can't include SQL yet? This is just using the ANTLR parser, right? + + +### @dai-chen on `api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java:92` + + +Because we haven't decided how to unify SQL yet. For example, we can enable SQL by Calcite parser directly or by our own ANTLR parser and AstBuilder. + + +### @dai-chen on `settings.gradle:11` + + +Right, for now it is for external consumer. Later we can refactor `plugin` module to use this unified API as well. + + +### @dai-chen on `api/README.md:21` + + +Good question. Currently, the plan isn’t directly executable. As noted in the README, the planner is designed to eventually return an executable plan—either a Calcite physical plan for immediate execution in the current JVM (useful for the OpenSearch plugin and CLI), or a SparkSQL plan for distributed execution by Spark (useful for PPL in Spark). + +I initially considered designing the API this way, but haven’t yet found a clean way to model everything within Calcite’s optimizer. I plan to work on this later, especially since PPL in Spark Phase 2 may require it. + + +### @Swiddis on `api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java:92` + + +My understanding was that Calcite came after parsing, does Calcite also have a parser? Good to know + + +### @dai-chen on `api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java:92` + + +Yes, Calcite itself has its own parser. You can find more details in their documentation and paper. + + +## General Comments + + +### @dai-chen + + +@LantaoJin @penghuo Please have a look when you have a moment. This is currently only for initial phase in https://github.com/opensearch-project/opensearch-spark/issues/1136 so we can begin publishing PRs on Spark side. Thanks! + + +### @dai-chen + + +There seems flaky test. + +``` +2025-06-24T17:39:21.3968640Z 3577 tests completed, 1 failed, 540 skipped +2025-06-24T17:39:21.3969870Z Tests with failures: +2025-06-24T17:39:21.4097140Z - org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT.testQ19 +``` + + +--- + +# PR #3782: Resolve commons beanutils to 2.0 + +**URL:** https://github.com/opensearch-project/sql/pull/3782 + +**Author:** @Swiddis + +**Created:** 2025-06-16T22:37:25Z + +**State:** MERGED + +**Merged:** 2025-06-17T14:07:36Z + +**Changes:** +1 -0 (1 files) + +**Labels:** `backport 2.x`, `backport main`, `maintenance`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Resolves CVE nits for 3.1 release -- vuln doesn't actually affect us, but you know how CVE tooling is + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3781: Add 3.1.0 release notes + +**URL:** https://github.com/opensearch-project/sql/pull/3781 + +**Author:** @Swiddis + +**Created:** 2025-06-16T22:33:16Z + +**State:** MERGED + +**Merged:** 2025-06-17T14:08:53Z + +**Changes:** +57 -0 (1 files) + +**Labels:** `backport main`, `maintenance` + + +## Description + +### Description +3.1 release notes + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `release-notes/opensearch-sql.release-notes-3.1.0.0.md:46` + + +maybe we should treat https://github.com/opensearch-project/sql/pull/3623 as a breaking: +`query.size_limit` has been misused as maximum amount of for fetching documents from OpenSearch, causing many data correctness issues. It has now been fixed to the maximum number of rows output of query. + + +### @LantaoJin on `release-notes/opensearch-sql.release-notes-3.1.0.0.md:46` + + +Because that, the "maximum amount of fetching documents from OpenSearch" is unlimited. (actually value is `Integer.MAX_VALUE`, requestedTotalSize=2147483647). Query on big data set may fail with CircuitBreakingException. ref #3779 + + +## General Comments + + +### @LantaoJin + + +Can we merge https://github.com/opensearch-project/sql/pull/3782 first and add it to release nodes? + + +--- + +# PR #3778: Migrate standalone integration tests to remote tests + +**URL:** https://github.com/opensearch-project/sql/pull/3778 + +**Author:** @yuancu + +**Created:** 2025-06-16T07:17:04Z + +**State:** MERGED + +**Merged:** 2025-06-17T09:14:25Z + +**Changes:** +2632 -3122 (73 files) + +**Labels:** `maintenance`, `backport-manually`, `backport-failed`, `testing`, `calcite`, `backport 2.19-dev` + + +## Description + +### Description +Migrate standalone integration tests to remote tests to better align with production environments. + +Additionally, I created a test suite called `CalciteNoPushdownIT`. It will run all remote tests without pushdown enabled. This replaces original IT classes with names like `Calcite***PushdownIT`. + +Implementation notes: +- `CalcitePPLDateTimeBuiltinFunctionIT` is not migrated since the timezone and locale of the cluster started locally differ from the settings of the test environment. E.g. I can't make` -Dtests.locale=my-Mymr-MM -Dtests.timezone=PST` take effect on the local opensearch cluster. (To fix in the future) +- `CalciteExplainIT` is not added to no-pushdown tests yet. It requires corresponding plans with Calcite without pushdown. +- `CalciteIPComparisonIT` is not added to no-pushdown tests yet. It requires fixing IP comparison with Calcite #3776 + +### Related Issues +Resolves #3777 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java:54` + + +Can TpchIT be migrated to remote IT? + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java:54` + + +Yes. Migrated. + + +## General Comments + + +### @LantaoJin + + +@yuancu could you help to backport it to 2.19-dev + + +### @yuancu + + +> @yuancu could you help to backport it to 2.19-dev + +Sure + + +--- + +# PR #3774: test2 + +**URL:** https://github.com/opensearch-project/sql/pull/3774 + +**Author:** @ahkcs + +**Created:** 2025-06-12T23:58:34Z + +**State:** MERGED + +**Merged:** 2025-06-12T23:58:54Z + +**Changes:** +96 -67 (6 files) + + +## Description + +test2 + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3773: test + +**URL:** https://github.com/opensearch-project/sql/pull/3773 + +**Author:** @ahkcs + +**Created:** 2025-06-12T23:54:18Z + +**State:** MERGED + +**Merged:** 2025-06-12T23:56:17Z + +**Changes:** +95 -67 (5 files) + + +## Description + +test + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3772: test code + +**URL:** https://github.com/opensearch-project/sql/pull/3772 + +**Author:** @ahkcs + +**Created:** 2025-06-12T23:51:16Z + +**State:** MERGED + +**Merged:** 2025-06-12T23:51:34Z + +**Changes:** +94 -67 (4 files) + + +## Description + +test code + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3770: only publish snapshots when code changed + +**URL:** https://github.com/opensearch-project/sql/pull/3770 + +**Author:** @ahkcs + +**Created:** 2025-06-12T23:43:11Z + +**State:** MERGED + +**Merged:** 2025-06-12T23:43:58Z + +**Changes:** +93 -67 (3 files) + + +## Description + +only publish snapshots when code changed + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3769: Test code fix + +**URL:** https://github.com/opensearch-project/sql/pull/3769 + +**Author:** @ahkcs + +**Created:** 2025-06-12T22:27:23Z + +**State:** MERGED + +**Merged:** 2025-06-12T22:29:14Z + +**Changes:** +4 -4 (1 files) + + +## Description + +Test code fix + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3768: Test code fix + +**URL:** https://github.com/opensearch-project/sql/pull/3768 + +**Author:** @ahkcs + +**Created:** 2025-06-12T22:11:17Z + +**State:** MERGED + +**Merged:** 2025-06-12T22:15:47Z + +**Changes:** +7 -7 (2 files) + + +## Description + +Test code fix + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3767: Test code fix + +**URL:** https://github.com/opensearch-project/sql/pull/3767 + +**Author:** @ahkcs + +**Created:** 2025-06-12T21:49:07Z + +**State:** MERGED + +**Merged:** 2025-06-12T21:59:40Z + +**Changes:** +428 -400 (2 files) + + +## Description + +Test code fix + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3763: Publish internal modules separately for downstream reuse + +**URL:** https://github.com/opensearch-project/sql/pull/3763 + +**Author:** @dai-chen + +**Created:** 2025-06-11T23:44:42Z + +**State:** MERGED + +**Merged:** 2025-06-16T15:56:55Z + +**Changes:** +90 -0 (2 files) + +**Labels:** `infrastructure` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR implements **Option A: Modular Publishing** from the Granularity section of the design. It introduces the ability to publish internal modules (e.g., `core`, `ppl`, `opensearch`) as separate Maven artifacts. This enables downstream consumers such as Spark to depend only on the components they require. + +Current naming proposal (subject to feedback): + +- Group ID: `org.opensearch.query` — places all artifacts under a single `query/` folder in the Maven repository. +- Artifact ID: `unified-query-`, e.g., unified-query-core, unified-query-ppl. +- Versioning: Currently follows the OpenSearch version (e.g., 3.1.0.0-SNAPSHOT), but may be decoupled in the future — similar to async-query-core, which uses its own versioning (e.g., 1.0.0). + +#### Local Publishing Test + +Example after running the local publish command: + +
    +$ ./gradlew publishUnifiedQueryPublicationToMavenLocal
    +
    +$ pwd
    +~/.m2/repository/org/opensearch
    +$ tree .
    +.
    +├── plugin  <-- no impact on plugin publishing
    +│   └── opensearch-sql-plugin
    +│       └── 3.1.0.0-SNAPSHOT
    +└── query
    +    ├── unified-query-common
    +    │   └── 3.1.0.0-SNAPSHOT
    +    ├── unified-query-core
    +    │   └── 3.1.0.0-SNAPSHOT
    +    ├── unified-query-opensearch
    +    │   └── 3.1.0.0-SNAPSHOT
    +    ├── unified-query-ppl
    +    │   └── 3.1.0.0-SNAPSHOT
    +    ├── unified-query-protocol
    +    │   └── 3.1.0.0-SNAPSHOT
    +    └── unified-query-sql
    +        └── 3.1.0.0-SNAPSHOT
    +
    + +#### Spark Integration Test + +Verified dependency resolution and query execution using ongoing Calcite 2.19 PR code and local Maven artifacts. + +
    +# Checkout Calcite backport draft PR #3752 and patch this PR changes
    +$ git fetch upstream pull/3752/head:pr-3752
    +$ git checkout pr-3752
    +
    +# Build by JDK 11 and publish 2.19 artifacts locally
    +$ java -version
    +openjdk version "11.0.25" 2024-10-15 LTS
    +OpenJDK Runtime Environment Corretto-11.0.25.9.1 (build 11.0.25+9-LTS)
    +OpenJDK 64-Bit Server VM Corretto-11.0.25.9.1 (build 11.0.25+9-LTS, mixed mode)
    +$ ./gradlew clean publishUnifiedQueryPublicationToMavenLocal
    +.
    +├── plugin
    +│   └── opensearch-sql-plugin
    +└── query
    +    ├── unified-query-common
    +    │   └── 2.19.3.0-SNAPSHOT
    +    ├── unified-query-core
    +    │   └── 2.19.3.0-SNAPSHOT
    +    ├── unified-query-opensearch
    +    │   └── 2.19.3.0-SNAPSHOT
    +    ├── unified-query-ppl
    +    │   └── 2.19.3.0-SNAPSHOT
    +    ├── unified-query-protocol
    +    │   └── 2.19.3.0-SNAPSHOT
    +    └── unified-query-sql
    +
    +# Add dependency to Spark: https://github.com/dai-chen/opensearch-spark/tree/verify-unified-ppl-dependency
    +    resolvers ++= Seq(
    +      "Local Maven Repository" at "file://" + Path.userHome.absolutePath + "/.m2/repository",
    +      "OpenSearch Snapshots" at "https://aws.oss.sonatype.org/content/repositories/snapshots/",
    +      "JitPack" at "https://jitpack.io"
    +    ),
    +    libraryDependencies ++= Seq(
    +      "com.amazonaws" % "aws-java-sdk" % "1.12.397" % "provided"
    +        exclude ("com.fasterxml.jackson.core", "jackson-databind"),
    +      ...
    +      "org.opensearch.query" % "unified-query-ppl" % "2.19.3.0-SNAPSHOT"
    +        exclude("org.opensearch.query", "unified-query-protocol")
    +        exclude("org.opensearch.query", "unified-query-opensearch")),
    +
    +# Run IT with new FlintSparkPPLCalciteParser registered to Spark extension
    +25/06/11 19:40:10 INFO FlintSparkPPLCalciteParser: 
    + PPL => SparkSQL
    +   PPL query: source = spark_catalog.default.flint_ppl_test | eval f = GET_FORMAT(DATE, 'USA') | fields f
    +   SQL query: SELECT `GET_FORMAT`('DATE', 'USA') `f`
    +FROM `spark_catalog`.`default`.`flint_ppl_test`
    +
    + +### Related Issues + +Resolves part 1 in https://github.com/opensearch-project/sql/issues/3734. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `build.gradle:None` + + +could we use "org.opensearch.ql", "org.opensearch.query"? + + +### @dai-chen on `build.gradle:None` + + +Sure, addressed in https://github.com/opensearch-project/sql/pull/3763/commits/b14a5d11ccc9ff28e71c92e2ac61f2f6c35ee2b9. + + +## General Comments + + +### @LantaoJin + + +Why not change the Group ID to `org.opensearch.unified-query`? + + +### @Swiddis + + +> Why not change the Group ID to org.opensearch.unified-query? + +I like being able to find the repo corresponding to a given Sonatype package as a 1-1 mapping, `sql` hasn't been strictly `sql` for a while so I think it's fine to stay consistent + + + +### @dai-chen + + +@LantaoJin @Swiddis I decided to go with `query` to align with existing usage in our plugin. Thanks! + +@peterzhuamazon Just checking if any concerns or procedures we should follow to publish multiple JARs from the SQL plugin to snapshot repository (https://aws.oss.sonatype.org/content/repositories/snapshots/)? The motivation for this can be found in the related issue above. + + +--- + +# PR #3760: Fix field not found issue in join output when column names are ambiguous + +**URL:** https://github.com/opensearch-project/sql/pull/3760 + +**Author:** @LantaoJin + +**Created:** 2025-06-10T07:13:17Z + +**State:** MERGED + +**Merged:** 2025-06-11T02:49:47Z + +**Changes:** +548 -370 (12 files) + +**Labels:** `PPL`, `calcite` + + +## Description + +### Description +``` +source=table1 | join left=t1 right=t2 on t1.id=t2.id table2 | eval a = 1 | fields t1.id, t2.id +``` +Fail with {alias=t2, fieldName=id} field not found + +#### Explanation +Join outputs could contain duplicated column name, typically the join criteria `ON t1.id = t2.id`. The `id` in output of join operator is ambiguous. Calcite will rename the ambiguous column name with numeric suffix, the output generates `id` and `id0` when a new project add to stack. + +#### Solution +To avoid `id0`, this PR renames the `id0` to `.id`, or `.id` if no alias existing. + +#### Example +table1: +| id | +| --- | +| 1 | +| 2 | + +table2 +| id | +| --- | +| 1 | +| 3 | +``` +source=table1 | join left=t1 right=t2 on t1.id=t2.id table2 | eval a = 1 +``` +outputs `t1.id`, `t2.id` and `a` + +``` +source=table1 | join on table1.id=table2.id table2 | eval a = 1 +``` +outputs `table1.id`, `table2.id` and `a` + +``` +source=table1 | join on table1.id=t2.id table2 as t2 | eval a = 1 +``` +outputs `table1.id`, `t2.id` and `a` + +``` +source=table1 | join right=tt on table1.id=t2.id [ source=table2 as t2 | eval b = id ] | eval a = 1 +``` +outputs `table1.id`, `tt.id`, `tt.b` and `a` + +### Related Issues +Resolves #3617 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/join.rst:139` + + +does `table1.id=tt.id` works? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:554` + + +Does it restrict joins between tables in the same database? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:547` + + +Just to confirm: does these logic below work for multi-join query plan tree recursively? + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:139` + + +it works + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:554` + + +I don't think so, the table name is just an `aliasField` to find the fieldName in RelBuilder. Ref https://github.com/apache/calcite/blob/15e0a3b56f7e48858b19127120b84987040c43dd/core/src/main/java/org/apache/calcite/tools/RelBuilder.java#L620-L622 + +But it brings me that one corner case cannot work: +If the join between tables in different databases contains same names. For example `db1.tableA join db2.tableA` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:547` + + +Yes. + + +## General Comments + + +### @LantaoJin + + +ping @dai-chen @penghuo for additional review. + + +--- + +# PR #3752: [backport to 2.19-dev] Backport calcite prs + +**URL:** https://github.com/opensearch-project/sql/pull/3752 + +**Author:** @xinyual + +**Created:** 2025-06-10T02:27:31Z + +**State:** MERGED + +**Merged:** 2025-06-16T15:15:07Z + +**Changes:** +76960 -2577 (791 files) + + +## Description + +### Description +The PR backport all calcite feature PRs and some pre-required PR to 2.19-dev. +The PR is based on 2.19.0-SNAPSHOT and delete maven-publish to avoid to publish to maven repo. +Some dependencies' version are fixed from [core](https://github.com/opensearch-project/OpenSearch/blob/ec5addab82d459743c5c6bb579e6573ecd610e03/gradle/libs.versions.toml#L31) +Backporting all PRs in main branch Starting from #3249 to now. +It also includes some PRs not related to calcite but as pre-required to simplify conflict resolve: +#3036 +#3032 +#3071 +#3145 +#3230 +#3246 + + +Remaining PR from #3249 not backported + +#3256 CI: Fix broken url for link-checker CI build +#3254 Unused PPL EARLIEST and LATEST aggregations +#3255 CI: BWC testsuite +#3244 CI: Geo-Spatial Plugin integration test (need for geo IP) +#3259 CI: Syntax fix for Jekyll build +#3228 PPL command expression implementation for geoip(need for geo ip) +#3274 Update PPL parse command doc with limitation +#3299 Fixed week of week based year handling +#3300 [Release 2.19] Add release notes for 2.19 +#3294 Build: Centralise dependencies version - Pt1 +#3304 Add other functions to SQL query validator +#3306 [v3.0.0] Remove SparkSQL support +#3243 PPL: Add json function and cast(x as json) function +#2891 Remove dependency from async-query-core to datasources +#3326 Remove opendistro settings and endpoints +#3319 [Release 3.0] Bump gradle 8.10.2 / JDK23 / 3.0.0.0-alpha1 on SQL plugin +#3337 [v3.0.0] Deprecate SQL Delete statement +#3278 Clean up syntax error reporting +#3367 [v3.0.0] Deprecate OpenSearch DSL format +#3414 Add release notes on version 3.0.0.0-alpha1 +#3434 Merge main for OpenSearch 3.0 release. +#3469 Set bouncycastle version inline +#3484 CVE-2024-57699 High: Fix json-smart vulnerability +#3489 Bump SQL main to version 3.0.0.0-beta1 +#3515 Fix: CSV handling of embedded crlf +#3539 Fix build due to phasing off SecurityManager usage in favor of Java Agent +#3269 Add SQLQuery Utils support for Vaccum queries +#3551 Using java-agent gradle plugin to phase off Security M anager in favor of Java-agent. +#3442 Fix issue 2489 [backported] +#3649 create a new directory org/opensearch/direct-query/ +#3671 [Fix CVE-2025-47273]Bump setuptools to 78.1.1 [backported] +#3604 Implement geoip udf with Calcite (need for geo ip) [backported] +#3660 Fix: Long IN-lists causes crash [backported] +#3715 Modified workflow: Grammar Files & Async Query Core +#3588 [AUTO] Increment version to 3.1.0-SNAPSHOT +#3589 Remove beta1 qualifier +#3592 Add release notes on version 3.0.0.0 + + + + +The difference in functionality between this PR and main: +1. After calcite enabled, datetime won't be allowed. + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +The link checker failure seems due to https://github.com/opensearch-project/sql/blob/0f197e5be89b1916a51eb0bb7b17ceff17efbaa1/.github/workflows/link-checker.yml#L8 + + +### @LantaoJin + + +| Link | Description | backport to 2.19-dev | +| --- | :-- | :-: | +| #3256 | CI: Fix broken url for link-checker CI build | no | +| #3254 | Unused PPL EARLIEST and LATEST aggregations | no | +| #3255 | CI: BWC testsuite | no | +| #3244 | CI: Geo-Spatial Plugin integration test (need for geo IP) | no | +| #3259 | CI: Syntax fix for Jekyll build | no | +| #3228 | PPL command expression implementation for geoip(need for geo ip) | no | +| #3274 | Update PPL parse command doc with limitation | yes | +| #3299 | Fixed week of week based year handling | yes | +| #3300 | [Release 2.19] Add release notes for 2.19 | no | +| #3294 | Build: Centralise dependencies version - Pt1 | no | +| #3304 | Add other functions to SQL query validator | yes | +| #3306 | [v3.0.0] Remove SparkSQL support | no | +| #3243 | PPL: Add json function and cast(x as json) function | no | +| #2891 | Remove dependency from async-query-core to datasources | no | +| #3326 | Remove opendistro settings and endpoints | no | +| #3319 | [Release 3.0] Bump gradle 8.10.2 / JDK23 / 3.0.0.0-alpha1 on SQL plugin | no | +| #3337 | [v3.0.0] Deprecate SQL Delete statement | no | +| #3278 | Clean up syntax error reporting | no | +| #3367 | [v3.0.0] Deprecate OpenSearch DSL format | no | +| #3414 | Add release notes on version 3.0.0.0-alpha1 | no | +| #3434 | Merge main for OpenSearch 3.0 release. | no | +| #3469 | Set bouncycastle version inline | no | +| #3484 | CVE-2024-57699 High: Fix json-smart vulnerability | yes | +| #3489 | Bump SQL main to version 3.0.0.0-beta1 | no | +| #3515 | Fix: CSV handling of embedded crlf | yes | +| #3539 | Fix build due to phasing off SecurityManager usage in favor of Java Agent | no | +| #3269 | Add SQLQuery Utils support for Vaccum queries | yes | +| #3551 | Using java-agent gradle plugin to phase off Security Manager in favor of Java-agent. | no | +| #3442 | Fix issue 2489 | yes | +| #3649 | create a new directory org/opensearch/direct-query/ | no | +| #3671 | [Fix CVE-2025-47273]Bump setuptools to 78.1.1 | yes | +| #3604 | Implement geoip udf with Calcite (need for geo ip) | ? | +| #3660 | Fix: Long IN-lists causes crash | yes | +| #3715 | Modified workflow: Grammar Files & Async Query Core | ? | +| #3588 | [AUTO] Increment version to 3.1.0-SNAPSHOT | no | +| #3589 | Remove beta1 qualifier | no | +| #3592 | Add release notes on version 3.0.0.0 | no | + + +### @LantaoJin + + +@penghuo please double confirm the "?" item in https://github.com/opensearch-project/sql/pull/3752#issuecomment-2964965999 and merge it. + + +### @penghuo + + +> @penghuo please double confirm the "?" item in [#3752 (comment)](https://github.com/opensearch-project/sql/pull/3752#issuecomment-2964965999) and merge it. + +https://github.com/opensearch-project/sql/pull/3715, not required. + + +### @xinyual + + +@qianheng-aws Please review it. + + +### @penghuo + + +@noCharger Do we need to backport async-query-core to 2.19-dev? + +2 related PRs +* https://github.com/opensearch-project/sql/pull/3304 +* https://github.com/opensearch-project/sql/pull/2891 + + +### @noCharger + + +> @noCharger Do we need to backport async-query-core to 2.19-dev? +> +> 2 related PRs +> +> * [Add other functions to SQL query validator #3304](https://github.com/opensearch-project/sql/pull/3304) +> * [Remove dependency from async-query-core to datasources #2891](https://github.com/opensearch-project/sql/pull/2891) + +It's good to have them on 2.19 to benefit people using 2.19 cluster. Not must-have though. + + +--- + +# PR #3748: Fix: correct ATAN(x, y) and CONV(x, a, b) functions bug + +**URL:** https://github.com/opensearch-project/sql/pull/3748 + +**Author:** @ishaoxy + +**Created:** 2025-06-09T06:54:14Z + +**State:** MERGED + +**Merged:** 2025-06-11T00:48:16Z + +**Changes:** +6 -15 (3 files) + +**Labels:** `PPL`, `calcite` + + +## Description + +### Description +Solved the bug that **ATAN(x, y)** function does not support two parameters in v3 version. Also fixed a bug in the **CONV(x, a, b)** function where incorrect type conversion caused unexpected results. + + +### Related Issues +Resolves #3672 + + + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - APPROVED + + +Thanks for the fixing. LGTM. + + +## Review Comments + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:535` + + +Could you elaborate why we register ATAN2 as ATAN? I see both Calcite and SparkSQL has ATAN and ATAN2 separately: https://calcite.apache.org/docs/reference.html, https://spark.apache.org/docs/latest/api/sql/#atan + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:535` + + +ATAN can accept 2 arguments in v2 which is not existing in standard SQL, this PR is to align with v2 to avoid breaking changes. If you think it's unnecessary, we can delete it in v2 and comment ATAN will 2 arguments will not accept since 3.1.0. + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java:535` + + +I see. Sure, we can go ahead if we follow aligning with V2 criteria for all functions. Just feel this may cause unnecessary work in both Calcite and Spark. We can discuss later. Thanks! + + +## General Comments + + +### @LantaoJin + + +@ishaoxy Remove these rows https://github.com/opensearch-project/sql/blob/270aa0dc3349e3a3c3b4e8e0e19c772f81c17781/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMathematicalFunctionIT.java#L20-L29 +to enable the tests with your patch. + + +--- + +# PR #3747: Support `flatten` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3747 + +**Author:** @yuancu + +**Created:** 2025-06-08T11:26:57Z + +**State:** MERGED + +**Merged:** 2025-06-11T00:37:42Z + +**Changes:** +705 -1 (11 files) + +**Labels:** `PPL`, `calcite` + + +## Description + +### Description +Implement `flatten` command that flattens a nested struct / object field into separate fields in a document. + +Flatten extracts the fields in a struct into separate other fields in the document: + +E.g. For +``` +{ + "name": "Jack", + "address": { + "state": "Oregon", + "city": "Portland" + } +} +``` +Flattening `address` will result in +``` +{ + "name": "Jack", + "address": { + "state": "Oregon", + "city": "Portland" + }, + "state": "Oregon", + "city": "Portland" +} +``` + +### Related Issues +Resolves #3712 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - DISMISSED + + +LGTM, waiting CI pass + + +### @qianheng-aws - DISMISSED + + +Need test for this case: + +``` +source = index | fields message| flatten message +``` + + +## Review Comments + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFlattenTest.java:None` + + +The logical plans of `flatten` is suspicious. + +The logical plan right after `visitFlatten` in `CalciteRelNodeVisitor` is: +``` +LogicalProject(DEPTNO=[$0], EMP=[$1], EMP.EMPNO=[$2], EMP.EMPNAME=[$3], EMPNO=[$2], EMPNAME=[$3]) + LogicalTableScan(table=[[scott, DEPT]]) +``` + +But it is later transformed to a simple logical table scan. + + +### @LantaoJin on `docs/user/ppl/cmd/flatten.rst:106` + + +could you add a limitation section to describe that it only works when calcite enabled. + + +### @LantaoJin on `docs/user/ppl/cmd/flatten.rst:31` + + +missing version section before syntax +``` +Version +======= +3.1.0 +``` + + +### @LantaoJin on `docs/user/ppl/cmd/appendcol.rst:None` + + +I think we can remove these highlights, we already have a Version section after Description + + +### @LantaoJin on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFlattenTest.java:None` + + +seem `relBuilder.projectPlus(expandedFields)` doesn't work in this case, please check the result by adding `verifyResult(root, expectedResult);` + + +### @yuancu on `ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFlattenTest.java:None` + + +Fixed the problem and corrected unit tests. + + +### @yuancu on `docs/user/ppl/cmd/flatten.rst:106` + + +Added. I think this means the same as "Since 3.1.0"? Maybe we could make it explicit somewhere that these features tagged as *since 3.1.0* only works with calcite enabled. + + +### @yuancu on `docs/user/ppl/cmd/flatten.rst:31` + + +Added + + +### @LantaoJin on `docs/user/ppl/cmd/flatten.rst:106` + + +3.1.0 != calcite enabled + + +### @yuancu on `docs/user/ppl/cmd/appendcol.rst:None` + + +Yep. Removed. + + +### @yuancu on `docs/user/ppl/cmd/flatten.rst:106` + + +Indeed, I also see 3.0.0. Thanks for explanation. + + +### @LantaoJin on `docs/user/ppl/cmd/ad.rst:None` + + +why made these lots of code changes? I am afraid it may easy to introduce PR conflicts. + + +### @yuancu on `docs/user/ppl/cmd/ad.rst:None` + + +Since I already removed tags like the following from these docs, I find it may be appropriate to correct / unify some language use in these docs alongside. After all, the docs are rarely updated once the corresponding PRs of their functionality are merged. + +``` + (Experimental) + (From 3.1.0) +``` + + +### @LantaoJin on `docs/user/ppl/cmd/ad.rst:None` + + +Still confuse me what is the relationship between removing these two lines and changes from other doc files. + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +do not add `Since` + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +just remove these two lines, do not change rests + + +### @yuancu on `docs/user/ppl/cmd/join.rst:None` + + +Without since, it sounds like the command is at its 3.0.0 version (indicating e.g. it might not be compatible with 4.0.0). I think using since makes it less confusing and is a common practice. + +I'll revert it in this PR. We may raise another issue to fix this in the future. + + +### @LantaoJin on `docs/user/ppl/cmd/join.rst:None` + + +No, this version contains `from` meaning. it is the min version to support. if one command/function retired from 4.0.0, we can just remove it from doc. the doc is a historical doc. if you are a 4.0.0 user, read the related doc from 4.0.0 tag. else, if you are a 2.15 user, read the 2.15 doc + + +### @penghuo on `docs/user/ppl/cmd/flatten.rst:63` + + +`message` is Json Object, why index as nested field. Usually, [Json Array is indexed as nested field](https://docs.opensearch.org/docs/latest/field-types/supported-field-types/nested/). + + +### @penghuo on `docs/user/ppl/cmd/flatten.rst:None` + + +> If the field is an nested array of structs, + +Why work on Array of structs if flatten does not work on Array? + + +### @penghuo on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1020` + + +Could u explain flattened manner in OpenSearch? what is expected result from OpenSearch + + +### @yuancu on `docs/user/ppl/cmd/flatten.rst:63` + + +Yes, using object here is more clear. Fixed. + +However, the boundary between array and object is blurry in OpenSearch. Both object and array fields can store a struct or an array of structs. Both nested and object works in this case. + + +### @yuancu on `docs/user/ppl/cmd/flatten.rst:None` + + +It's because although not expected, an object/nested field can store arrays. I have updated the doc to make it easier to understand. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1020` + + +Flatten extracts the fields in a struct into separate other fields in the document: + +E.g. For +``` +{"name": "Jack", "address": {"state": "Oregon", "city": "Portland"}} +``` +Flattening `address` will result in +``` +{"name": "Jack", "address": {"state": "Oregon", "city": "Portland"}, "state": "Oregon", "city": "Portland"} +``` + +(Also copied to PR description) + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1027` + + +Seems the implementation relies on the flattened nested fields in the RelRowType, but it may not work for PPL: + +``` +source = index | fields message| flatten message +``` + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1027` + + +It fails this test case indeed. Trying to fix it. + + +### @LantaoJin on `docs/user/ppl/cmd/flatten.rst:None` + + +Change to: + +`flatten` command may not work as expected when its flattened fields are invisible. +For example in query `source=my-index | fields message | flatten message`, +the `flatten message` command doesn't work since some flattened fields such as `message.info` and `message.author` after command `fields message` are invisible. +As an alternative, you can change to `source=my-index | flatten message` + + + +### @yuancu on `docs/user/ppl/cmd/flatten.rst:None` + + +Done. + + +## General Comments + + +### @yuancu + + +> Need test for this case: +> +> ``` +> source = index | fields message| flatten message +> ``` + +We need complete support for typed arrays and structs in order to make it work in such scenarios. + +I added an ignored test case and left a TODO item, saying that we'll correct it after fixing #3459 and #3751 + + +--- + +# PR #3745: Support `expand` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3745 + +**Author:** @yuancu + +**Created:** 2025-06-06T08:25:38Z + +**State:** MERGED + +**Merged:** 2025-06-10T05:34:27Z + +**Changes:** +770 -3 (18 files) + +**Labels:** `PPL`, `calcite` + + +## Description + +### Description +Support `expand` command with Calcite. + +Note: `expand` only explodes nested arrays, not maps. + +### Related Issues +Resolves #3711 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +Please add UT `CalcitePPLExpandTest` in folder ppl/src/test/java/org/opensearch/sql/ppl/calcite + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java:None` + + +revert this please, you can set in IDE to prevent auto squash. + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/tree/Expand.java:26` + + +Does `@Getter` of L18 need to remove? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/ast/tree/Expand.java:22` + + + hashcode annotation is missing + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:201` + + +why static? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1125` + + +what if ppl `| eval a = col | expand a`? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +`9.` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java:None` + + +why we still need this class? LogicalXX is only used in v2 + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java:None` + + +remove it + + +### @LantaoJin on `docs/user/ppl/cmd/expand.rst:None` + + +Please follow the command document style. Ref docs/user/ppl/cmd/eventstats.rst + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java:None` + + +We can wait user's feedback in future. I don't either think we can support it now. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java:None` + + +Check data instead of row's amount. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:250` + + +Add a comment here: using `UNDEFINED` instead of `UNKNOWN` to avoid throwing exception. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:201` + + +Because it does not depend on any instance method / field. `tryToRemoveMetaFields` is also declared as a static method. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1125` + + +It will still be able to expand the column since `a` is already one of the projected fields after `eval`. Added a test case for this. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java:None` + + +Reverted. Thanks for reminding. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/tree/Expand.java:26` + + +Yes, removed. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/ast/tree/Expand.java:22` + + +Fixed + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:None` + + +Fixed + + +### @yuancu on `core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java:None` + + +Removed + + +### @yuancu on `docs/user/ppl/cmd/expand.rst:None` + + +Fixed the style. + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java:None` + + +Fixed + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:250` + + +Comment added + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java:None` + + +`arries` typo? + + +### @LantaoJin on `docs/user/ppl/cmd/expand.rst:None` + + +`mirgration` typo? + + +### @LantaoJin on `docs/user/ppl/cmd/expand.rst:None` + + +better to use lower case `as` in syntax + + +### @yuancu on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java:None` + + +yep, fixed + + +### @yuancu on `docs/user/ppl/cmd/expand.rst:None` + + +yes 🫠 + + +### @yuancu on `docs/user/ppl/cmd/expand.rst:None` + + +fixed + + +### @LantaoJin on `docs/user/ppl/cmd/expand.rst:33` + + +missing version section before syntax + + +### @penghuo on `docs/user/ppl/cmd/expand.rst:16` + + +nested array -> array? why emphasized nested? + + +## General Comments + + +### @yuancu + + +TODOs: +- [x] Test expanding empty arrays +- [ ] Throw errors when expanding a non-array field (e.g. strings stored in a string field) +- [x] Add unit test (challenging since nested arrays are not in place in Calcite's test data) +- [ ] Update docs (following that of `eventstats`) + + +### @yuancu + + +Just to confirm: when the field to expand is an empty array, should I keep it in the result? @LantaoJin @qianheng-aws @penghuo + +The current implementation keeps it, with the corresponding expanded value set to `null`. (This corresponds to left join.) + +E.g. for +``` +{"company": "OldEdge", "employees": ["Jack", "Ben"]} +{"company": "NewEdge", "employees": []} +``` +expanding employees results in +``` +{"company": "OldEdge", "employees": "Jack"} +{"company": "OldEdge", "employees": "Ben"} +{"company": "NewEdge", "employees": null} +``` + +> *although technically expanding string arrays is not supported yet* + + +--- + +# PR #3741: Support trendline command in Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3741 + +**Author:** @songkant-aws + +**Created:** 2025-06-05T09:15:32Z + +**State:** MERGED + +**Merged:** 2025-06-10T02:02:56Z + +**Changes:** +448 -42 (14 files) + +**Labels:** `PPL`, `calcite` + + +## Description + +### Description +Support trendline command in Calcite + +### Related Issues +Resolves #3466 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +Will `wma` work in v2? Please add wma related documents in trendline.rst + + +### @LantaoJin - DISMISSED + + +LGTM once typo is fixed. + + +### @LantaoJin - DISMISSED + + +LGTM, waiting CI pass + + +## Review Comments + + +### @LantaoJin on `docs/user/ppl/cmd/trendline.rst:None` + + +Change to: +Starting with version 3.1.0, the ``trendline`` command requires all values in the specified ``field`` to be non-null. Any null values present in the calculation field will be automatically excluded from the command's output. + + +### @penghuo on `ppl/src/main/antlr/OpenSearchPPLParser.g4:239` + + +update doc to explain new algorithm? + + +### @songkant-aws on `docs/user/ppl/cmd/trendline.rst:None` + + +Done + + +### @dai-chen on `docs/user/ppl/cmd/trendline.rst:None` + + +np: `amd` typo? + + +### @dai-chen on `docs/user/ppl/cmd/trendline.rst:18` + + +Should this command later enforce sorting either on an implicit timestamp field? + + +### @LantaoJin on `docs/user/ppl/cmd/trendline.rst:18` + + +> Should this command later enforce sorting either on an implicit timestamp field? + +We could wait for more user feedbacks. Seems more than one command could be affected with an implicit timestamp field. + + +### @songkant-aws on `docs/user/ppl/cmd/trendline.rst:18` + + +Yeah, it could depend on customer feature request. + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLTrendlineIT.java:19` + + +missing CalcitePPLTrendlinePushdownIT if we add IT in standalone + + +### @songkant-aws on `docs/user/ppl/cmd/trendline.rst:None` + + +Fixed + + +### @songkant-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLTrendlineIT.java:19` + + +It's added now. + + +## General Comments + + +### @LantaoJin + + +CI failures CalcitePPLAppendcolTest > ... FAILED is not related to this fixing. + + +### @songkant-aws + + +@LantaoJin @penghuo Updated the doc with new wma algorithm and callout it's supported with Calcite enabled. + + +### @songkant-aws + + +@LantaoJin @dai-chen Fixed typo and make some doc phrasing clearer. + + +--- + +# PR #3738: Support ResourceMonitor with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3738 + +**Author:** @LantaoJin + +**Created:** 2025-06-05T06:21:21Z + +**State:** MERGED + +**Merged:** 2025-06-05T21:44:05Z + +**Changes:** +54 -11 (8 files) + +**Labels:** `enhancement`, `calcite` + + +## Description + +### Description +- Support ResourceMonitor with Calcite + - Currently, the `CalciteEnumerableIndexScan` is the only physical operator we implemented. The rests are all default implementations of Calcite. Resource monitor can only be added in scan operator. But for memory checker, it‘s fine. +- Fix the failed UTs related to #3673 + +### Related Issues +Resolves #3454 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +I think the non-intrusive decorator approach in V2 may be better for safeguarding additional operators in the future and also give Spark the flexibility to bypass it if needed—especially when unifying the physical index scan operator in the PPL-Spark integration. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +> I think the non-intrusive decorator approach in V2 may be better for safeguarding additional operators in the future and also give Spark the flexibility to bypass it if needed—especially when unifying the physical index scan operator in the PPL-Spark integration. + +Agree, we can refactor it later by adding a wrapper physical operator as a decorator. It requires +1. adding user-defined Calcite logical plan (related to https://github.com/opensearch-project/sql/issues/3329) +2. corresponding physical operator generator rule and decorator physical operator. + + +--- + +# PR #3736: Add Commit-Based Artifact Download Support + +**URL:** https://github.com/opensearch-project/sql/pull/3736 + +**Author:** @ahkcs + +**Created:** 2025-06-04T22:45:44Z + +**State:** MERGED + +**Merged:** 2025-06-20T21:50:51Z + +**Changes:** +581 -354 (4 files) + +**Labels:** `infrastructure` + + +## Description + +### Description + +This PR modified workflow to support commit-based downloads using a commit-version-map.json file. This allows users to download specific artifact versions that correspond to a particular Git commit without needing to know the precise version number. + +```Problem Addressed``` + +Previously, our download scripts required specifying an exact version (e.g., 0.1.0-SNAPSHOT), which made it difficult to retrieve artifacts that were built from a specific commit. + +Added feature to only change code when corresponding code changes + + +```Solution``` + +We've implemented a commit-to-version mapping system that: + +Creates and maintains a mapping file (commit-version-map.json) during snapshot publishing +Enables artifact download scripts to look up specific versions by commit ID +Works for both language-grammar and async-query-core artifacts + +```Key Changes``` + +In the Publishing Workflow + +Added code to generate/update the commit-history-async-query-core.json and commit-history-language-grammar.json file +https://aws.oss.sonatype.org/content/repositories/snapshots/org/opensearch/async-query-core/commit-history-async-query-core.json +https://aws.oss.sonatype.org/content/repositories/snapshots/org/opensearch/language-grammar/commit-history-language-grammar.json +Captures detailed version information including both base version and timestamped artifact version +Added code to prevent concurrent commits + + + +## Reviews + + +### @noCharger - COMMENTED + + +Are we creating a new file instead of reusing maven-metadata.xml? + + +### @Swiddis - COMMENTED + + +What happens if two jobs run at once and pull the previous mapping at the same time? + +Ideally, we should include the commit ID as part of the artifact key itself, and then in the script pull by the commit directly without needing to parse a separate mapping. + + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +> Are we creating a new file instead of reusing maven-metadata.xml? + +Yes, we are creating a new file commit-history json to manage commit-version mapping + + +### @noCharger + + +> What happens if two jobs run at once and pull the previous mapping at the same time? +> +> Ideally, we should include the commit ID as part of the artifact key itself, and then in the script pull by the commit directly without needing to parse a separate mapping. + ++1 on this. + + +--- + +# PR #3735: fix mapping format + +**URL:** https://github.com/opensearch-project/sql/pull/3735 + +**Author:** @ahkcs + +**Created:** 2025-06-04T21:16:25Z + +**State:** MERGED + +**Merged:** 2025-06-04T21:16:45Z + +**Changes:** +22 -20 (1 files) + + +## Description + +fix mapping format + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3733: generate jars based on commit_id + +**URL:** https://github.com/opensearch-project/sql/pull/3733 + +**Author:** @ahkcs + +**Created:** 2025-06-04T21:00:48Z + +**State:** MERGED + +**Merged:** 2025-06-04T21:03:11Z + +**Changes:** +180 -5 (3 files) + + +## Description + +generate jars based on commit_id + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3732: Add debugger details to dev guide + +**URL:** https://github.com/opensearch-project/sql/pull/3732 + +**Author:** @Swiddis + +**Created:** 2025-06-04T16:45:19Z + +**State:** MERGED + +**Merged:** 2025-06-23T20:11:38Z + +**Changes:** +10 -0 (1 files) + +**Labels:** `documentation`, `maintenance` + + +## Description + +### Description +Just some info that I wish I had when setting up IntelliJ debugging. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `DEVELOPER_GUIDE.rst:None` + + +``` +./gradlew opensearch-sql:run -DdebugJVM +``` + + +## General Comments + + +### @penghuo + + +@LantaoJin Please take another look. + + +### @noCharger + + +This is the same as how developers debug the OpenSearch core + + +--- + +# PR #3730: [Backport 2.x] Modified workflow: Grammar Files & Async Query Core + +**URL:** https://github.com/opensearch-project/sql/pull/3730 + +**Author:** @ahkcs + +**Created:** 2025-06-03T21:25:59Z + +**State:** MERGED + +**Merged:** 2025-06-09T18:03:20Z + +**Changes:** +7229 -7 (16 files) + + +## Description + +Description +Publish to Maven: Grammar Files & Async Query Core +This PR adds a GitHub Actions workflow to automate the publication of artifacts to the Maven snapshot repository for the language-grammar package and the async-query-core module. + +Key Features + +Grammar File Publishing (language-grammar) +Collects .g4 ANTLR grammar files from the language-grammar module. +Packages them into a .zip artifact with a generated POM. + +Publishes the zip artifact to Maven snapshot repo under org.opensearch:language-grammar. + +Injects the current Git commit SHA into maven-metadata.xml as for traceability. + +Async Query Core Publishing (async-query-core) +Builds a shaded JAR (shadowJar) of the async-query-core module using Gradle. +Prepares and publishes the JAR and generated POM to Maven snapshot repo under org.opensearch:async-query-core. + +Similarly injects the current Git commit SHA into version metadata. + + + +## Reviews + + +### @noCharger - APPROVED + + +Can you double check [Link Checker / linkchecker (pull_request)](https://github.com/opensearch-project/sql/actions/runs/15428346538/job/43420707182?pr=3730)Failing after 8s + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3729: Support `appendcol` command with Calcite + +**URL:** https://github.com/opensearch-project/sql/pull/3729 + +**Author:** @LantaoJin + +**Created:** 2025-06-03T09:22:03Z + +**State:** MERGED + +**Merged:** 2025-06-04T23:06:24Z + +**Changes:** +759 -26 (39 files) + +**Labels:** `PPL`, `feature`, `calcite` + + +## Description + +### Description +Support `appendcol` command with Calcite + +### Related Issues +Resolves #3172 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the changes! + +I wonder in the long term whether `appendcols` should be a new relational operator that extends the SQL standard? Semantically, it's closer to a vertical version of `UNION ALL`, aligning rows by position rather than key. While a `JOIN` using `ROW_NUMBER()` can reproduce the behavior, does it introduces unnecessary overhead? + + +## Review Comments + + +### @penghuo on `docs/user/ppl/cmd/appendcol.rst:86` + + +query result is same as override = false?, we should demo the usage of override? + + +### @penghuo on `docs/user/ppl/cmd/appendcol.rst:26` + + +in case appendcol is implemented as full join, we should consider enforce limitations? + + +### @LantaoJin on `docs/user/ppl/cmd/appendcol.rst:26` + + +I will create a new issue about operator limitation for all high cost operators. + + +### @LantaoJin on `docs/user/ppl/cmd/appendcol.rst:86` + + +The result is different with override=false. check the (second row, first column) + + +## General Comments + + +### @LantaoJin + + +> Thanks for the changes! +> +> I wonder in the long term whether `appendcols` should be a new relational operator that extends the SQL standard? Semantically, it's closer to a vertical version of `UNION ALL`, aligning rows by position rather than key. While a `JOIN` using `ROW_NUMBER()` can reproduce the behavior, does it introduces unnecessary overhead? + +AFAIK, only `lateral` join (ANSI 2003) is close to a vertical version of `UNION ALL`. But for the case of aligning one-by-one rows by position, `lateral` join is not a proper solution. + + +--- + +# PR #3726: Add splunk to ppl cheat sheet + +**URL:** https://github.com/opensearch-project/sql/pull/3726 + +**Author:** @penghuo + +**Created:** 2025-06-02T22:00:35Z + +**State:** MERGED + +**Merged:** 2025-09-18T19:52:25Z + +**Changes:** +240 -0 (1 files) + +**Labels:** `documentation`, `PPL`, `v3.3.0` + + +## Description + +### Description +Add [Splunk to PPL cheat sheet doc](https://github.com/penghuo/os-sql/blob/dcca680b0980f28406987fe6fb49a51e2c185889/docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md) + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Maybe we can reference this in other doc so users can find it easily. + + +### @RyanL1997 - COMMENTED + + +Hi @penghuo , thanks for the change. I just checked the parts for `rex` and `regex` and left a comment. + + +### @vamsimanohar - APPROVED + + +LGTM + + +## Review Comments + + +### @penghuo on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:135` + + +bin and stats commands does not work as expected. https://github.com/opensearch-project/sql/issues/4322 + + +### @RyanL1997 on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:None` + + +I think we are supporting `mode=sed` in PPL rex according to the implementation of https://github.com/opensearch-project/sql/pull/4241. + +Instead, I think one of the diffs I can think about it is the `max_match`: Unlike SPL, instead of unlimited multiple match patterns when `max_match = 0`, PPL will have a pre-configured default to avoid memory issue and optimize the performance. + +Reference: +> * max_match: optional integer (default=1). Maximum number of matches to extract. If greater than 1, extracted fields become arrays. The value 0 means unlimited matches, but is automatically capped to the configured limit (default: 10, configurable via ``plugins.ppl.rex.max_match.limit``). + +https://github.com/opensearch-project/sql/blob/cce492d7e9bb44d79f2272f77963404e1dc7d5a4/docs/user/ppl/cmd/rex.rst?plain=1#L37C1-L38C1 + + + + + +### @anasalkouz on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:None` + + +I think we support both formats for Percentiles. @aalva500-prog can you please confrim? + + +### @aalva500-prog on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:None` + + +Yes, we support both formats. We also support the shortcut `p95(field)`. + + +### @penghuo on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:None` + + +done. https://github.com/opensearch-project/sql/pull/3726/commits/3ed50472cb7de39e0005a660a7869949e6534755 + + +### @penghuo on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:None` + + +done. https://github.com/opensearch-project/sql/pull/3726/commits/3ed50472cb7de39e0005a660a7869949e6534755 + + +### @dai-chen on `docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md:188` + + +Do we also support + ? https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/eval.rst#example-4-string-concatenation-with-operator-need-to-enable-calcite + + +## General Comments + + +### @ciacciop + + +Quick question. For lines 40 & 41, shouldn't the PPL equivalent match row 39 and include the single quotes ? +- i.e. - field in ('404', '503' ) + +If field was meant to be generic from line to line, then may I suggest using some type based nomenclature in the names to help make it more clear? +- i.e. - strField / intField / dblField + + +### @anasalkouz + + +@penghuo why this PR still on draft? Already has 2 approvals, shall we move forward? + + +--- + +# PR #3724: revision + +**URL:** https://github.com/opensearch-project/sql/pull/3724 + +**Author:** @ahkcs + +**Created:** 2025-06-02T19:14:00Z + +**State:** MERGED + +**Merged:** 2025-06-02T19:14:49Z + +**Changes:** +2 -9 (2 files) + + +## Description + +revision + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #3723: revision + +**URL:** https://github.com/opensearch-project/sql/pull/3723 + +**Author:** @ahkcs + +**Created:** 2025-06-02T19:06:36Z + +**State:** MERGED + +**Merged:** 2025-06-02T19:07:33Z + +**Changes:** +15 -5 (4 files) + + +## Description + +revisin + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + diff --git a/.kiro/resources/sql/2025-12-01-TO-2025-12-31.md b/.kiro/resources/sql/2025-12-01-TO-2025-12-31.md new file mode 100644 index 0000000000..7562497616 --- /dev/null +++ b/.kiro/resources/sql/2025-12-01-TO-2025-12-31.md @@ -0,0 +1,4940 @@ +# PR Review Data: opensearch-project/sql +**Date Range:** 2025-12-01 to 2025-12-31 +**Total PRs:** 66 +**Generated:** 2026-01-21T14:22:45.308730 + +--- + +# PR #5009: Fix PIT context leak in Legacy SQL for non-paginated queries + +**URL:** https://github.com/opensearch-project/sql/pull/5009 + +**Author:** @aalva500-prog + +**Created:** 2025-12-31T23:19:12Z + +**State:** MERGED + +**Merged:** 2026-01-08T22:59:29Z + +**Changes:** +387 -50 (4 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `backport 3.1`, `bugFix` + + +## Description + +### Description +This PR fixes Point-in-Time (PIT) context leak in the Legacy SQL engine when executing queries without `fetch_size` parameter. + +**Problem:** +The Legacy SQL engine was creating PIT contexts for ALL queries but only cleaning them up when cursors were created (paginated queries with `fetch_size > 0`). Non-paginated queries leaked PITs, causing accumulation until the 300 PIT limit was exhausted and queries to fail. + +**Solution:** +- Only create PIT when `fetch_size > 0` and not `null` (pagination requested) + +**Impact:** +- Non-paginated queries no longer leak PIT contexts +- Paginated queries continue to work correctly with cursor-based PIT management + +### Related Issues +Resolves #5002 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - DISMISSED + + +Thanks for the fix! + + +## Review Comments + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:100` + + +Wrong suggestion, there is no compilation error as the executeCursorCloseQuery method is not missing - it's already implemented in the parent class SQLIntegTestCase and is being correctly inherited by PointInTimeLeakIT. + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Will take care of it + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Will take care of it + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +issue (non-blocking): Why do we need this catch block? The finally block does the same thing? + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +issue (readability): It's not obvious how this logic avoids deleting the cursor for paginated queries. + +`cursorCreated` could stand to be more clearly named, and all the cursor config could be split out to a smaller method to make it easier to follow this branching. `isDefaultCursor` isn't a very descriptive name either + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +I'll remove the catch block and leave the finally block, thanks! + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Sure, will simplify the implementation and make it more readable, thanks! + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:None` + + +Please remove all sysout + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Could you separate the logic in 2 methods instead of flow control by this flag? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:99` + + +What is this used for? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Not necessary because finally block will executed anyway? + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Yeah, I'm fixing it, thanks! + + +### @vamsimanohar on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:102` + + +can you explain the logic here in words? What does fetchsize mean here? + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Sure, I'm working on it, thanks! + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:102` + + +fetchsize is the size of the fetch_size parameter of the query, I'm only creating a PIT if the user specifies the fetch_size parameter and it is greater than zero. Before this, the PIT was always created regardless the user providing fetchsize in the query or not. My logic is to avoid creating PIT if `fetchSize` is 0 or not specified, meaning no pagination is needed and no PIT is created. Hope that makes sense. + + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:99` + + +It was originally in the code, will take a further look and update here, thanks! + +__Update:__ Ran the integration tests without this line and got failures in `CursorIT`, `PrettyFormatResponseIT`, `SQLCorrectnessIT`, `PaginationFallbackIT`, and `PaginationIT`. + +According to my investigation, the call to `queryAction.explain()` is necessary because it builds the OpenSearch `SearchRequestBuilder` before executing the query. Without it, the `request` field in `DefaultQueryAction` remains null, which causes failures when `queryAction.getRequestBuilder()` is called later. + +The original code was: + +```java +SqlOpenSearchRequestBuilder sqlOpenSearchRequestBuilder = queryAction.explain(); +``` + +Since the returned variable is never used, I simplified it to: + +```java +queryAction.explain(); +``` + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:None` + + +np: I recall org.json supports JSON path. Could you check `query()` or `optQuery()` API? + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java:None` + + +Thanks for the refactor. It looks much cleaner! + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/legacy/PointInTimeLeakIT.java:None` + + +Sure, I'll modify this method accordingly, thanks! + + +## General Comments + + +### @vamsimanohar + + +Have you also checked in other scenarios if the PIT contexts are closed properly + + +### @aalva500-prog + + +> Have you also checked in other scenarios if the PIT contexts are closed properly + +@vamsimanohar The PIT context leak only happens in Legacy engine when the index name has special characters like `test-logs-2025.01.01`, it is not wrapped in backticks, and the fecth_size is not provided. It doesn't happen when the index name is like `test-logs-2025-01-01`, as it never falls to Legacy engine and it is handled by V2/V3. The IT class `PointInTimeLeakIT` should cover such scenarios, specifically this test case `testCompareV1AndV2EnginePitBehavior` compares V1 vs V2 by using backticks around the index so that the query is handle by V2 only. Note: I did manual testing in my own local cluster and AOS domain to confirm the same. + + +--- + +# PR #5007: [Backport 2.19-dev] adding capability in SQLQueryUtils to identify if SQL query is for creating a table or not + +**URL:** https://github.com/opensearch-project/sql/pull/5007 + +**Author:** @Parasjg + +**Created:** 2025-12-30T07:45:44Z + +**State:** MERGED + +**Merged:** 2026-01-07T02:21:32Z + +**Changes:** +100 -7 (2 files) + + +## Description + +BackPorting Already Merged PR (https://github.com/opensearch-project/sql/pull/4029) - https://github.com/opensearch-project/sql/commit/5cb51814ed1bb527c47a5e827c773a1899c081e9 +from main to 2.19-dev +### Description +Adding capability in SQLQueryUtils to identify if SQL query is for creating a table or not. + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + +### Check List +- [NA] New functionality has been documented. + - [NA] New functionality has javadoc added. + - [NA] New functionality has a user manual doc added. +- [NA] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) all confirmed. +- [NA] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [NA] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5003: [Backport 2.19-dev] Support enumerable TopK + +**URL:** https://github.com/opensearch-project/sql/pull/5003 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-29T07:08:42Z + +**State:** MERGED + +**Merged:** 2025-12-29T09:32:07Z + +**Changes:** +334 -217 (39 files) + + +## Description + +Backport 08be6f92fecdf6ea14cbcb38762677e2dcfcf85d from #4993. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #5000: [Backport 2.19-dev] Prune old in operator push down rules (#4992) + +**URL:** https://github.com/opensearch-project/sql/pull/5000 + +**Author:** @qianheng-aws + +**Created:** 2025-12-26T09:41:39Z + +**State:** MERGED + +**Merged:** 2025-12-29T06:24:06Z + +**Changes:** +471 -385 (110 files) + + +## Description + +(cherry picked from https://github.com/opensearch-project/sql/pull/4992 commit https://github.com/opensearch-project/sql/commit/0ecb0a95b252401474d78fb45abafa30cb31453d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4997: Apply feedback from documentation-website to PPL command docs + +**URL:** https://github.com/opensearch-project/sql/pull/4997 + +**Author:** @ritvibhatt + +**Created:** 2025-12-24T23:43:31Z + +**State:** MERGED + +**Merged:** 2026-01-09T17:29:20Z + +**Changes:** +4354 -3422 (60 files) + +**Labels:** `documentation`, `enhancement` + + +## Description + +### Description +- Apply feedback from technical writers from documentation-website PR (https://github.com/opensearch-project/documentation-website/pull/11688) to SQL repo to keep documentation in sync +- Update script to auto-convert codeblock tables to markdown tables, support single directory export, remove empty tables, improve Jekyll angle bracket/asterisk escaping, convert markdown emphasis to Jekyll attributes +- Add README with SOP for docs exporter script + +When the docs exporter script is applied now, the differences from the docs website are: links to sections not yet ported to documentation-website are commented out in the docs website (functions, admin), whitespace differences, different nav order for some command files, and some differences in escape characters for angle brackets and asterisks. Everything except the commented out admin links will be addressed in the next documentation-website PR (https://github.com/opensearch-project/documentation-website/pull/11747). + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @kylehounslow + + +Thanks @ritvibhatt! Does this PR enable zero diff when re-exporting back to `documentation-website`? Or are there manual changes that need to happen? + + +### @ritvibhatt + + +> When the docs exporter script is applied now, the differences from the docs website are: links to sections not yet ported to documentation-website are commented out in the docs website (functions, admin), whitespace differences, different nav order for some command files, and some differences in escape characters for angle brackets and asterisks. Everything except the commented out admin links will be addressed in the next documentation-website PR ([opensearch-project/documentation-website#11747](https://github.com/opensearch-project/documentation-website/pull/11747)). + +@kylehounslow It still has a few differences right now: +- links to sections not yet ported to documentation-website are commented out in the docs website (functions, admin) +- some whitespace differences and different nav order for some command files (looks like documentation website skips some numbers for nav order) +- some differences with adding escape characters for angle brackets and asterisks (causing some problems with examples rendering in documentation website) +Raised another PR for the documentation website that should address everything except the commented out admin links: https://github.com/opensearch-project/documentation-website/pull/11747. + + + +--- + +# PR #4996: Sync up this path publish-async-query-core.yml from main to 2.19-dev and also changing the JAVA to 17 + +**URL:** https://github.com/opensearch-project/sql/pull/4996 + +**Author:** @Parasjg + +**Created:** 2025-12-24T10:13:26Z + +**State:** MERGED + +**Merged:** 2026-01-06T17:46:57Z + +**Changes:** +17 -7 (2 files) + + +## Description + +Sync up this path publish-async-query-core.yml from main to 2.19-dev and also changing the JAVA to 17 + +### Description +Sync up this path publish-async-query-core.yml from main to 2.19-dev and also changing the JAVA to 17 + +### Related Issues +Resolving issue related to java version when upadating the async-query-core jars in DQS + +### Check List +- [NA] New functionality includes testing. +- [NA] New functionality has been documented. + - [NA] New functionality has javadoc added. + - [NA] New functionality has a user manual doc added. +- [NA] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) all confirmed. +- [NA] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [NA] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @ahkcs - APPROVED + + +LGTM + + +## Review Comments + + +### @LantaoJin on `.github/workflows/publish-async-query-core.yml:38` + + +shouldn't be 11 for 2.19? + + +### @Parasjg on `.github/workflows/publish-async-query-core.yml:38` + + +We connected with Kai Huang , he suggested 17 could work . Do let me know if we can't go ahead with this one. + + +### @ahkcs on `.github/workflows/publish-async-query-core.yml:38` + + +We want to use JDK 17 for async query core jar here and main branch doesn't support 17, that's why I suggested them to backport the maven snapshot upload logic to 2.19-dev branch with JDK 17 supported, please let me know if there's any concerns for this + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4995: [Backport 2.19-dev] Dedup pushdown (TopHits Agg) should work with Object fields (#4991) + +**URL:** https://github.com/opensearch-project/sql/pull/4995 + +**Author:** @LantaoJin + +**Created:** 2025-12-24T09:07:32Z + +**State:** MERGED + +**Merged:** 2025-12-26T07:31:16Z + +**Changes:** +53 -27 (6 files) + + +## Description + +(cherry picked from #4991 commit 1192376a7b0856375b3dbea6c54ed3420e593b7d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4993: Support enumerable TopK + +**URL:** https://github.com/opensearch-project/sql/pull/4993 + +**Author:** @LantaoJin + +**Created:** 2025-12-23T12:14:09Z + +**State:** MERGED + +**Merged:** 2025-12-29T06:43:07Z + +**Changes:** +334 -217 (39 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + + +## Description + +### Description +Support enumerable TopK, check #4982 for issue details. + +- EnumerableTopKConverterRule + - Convert `LogicalSort` with `fetch` to `CalciteEnumerableTopK` +- EnumerableTopKMergeRule + - Merge `EnumerableLimit` and `EnumerableSort` to `CalciteEnumerableTopK` + + +The `CalciteEnumerableTopK` is derived from `EnumerableLimitSort` which has the corrected cost computation. + +### Related Issues +Resolves #4982 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - DISMISSED + + +LGTM + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableTopKRule.java:None` + + +Do we need copy the license from Calcite? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +change `{"field":"state.keyword","missing_bucket":false,"order":"asc"}` to`{"field":"state.keyword","missing_bucket":false,"order":"desc"}` + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml:None` + + +ditto + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableTopKRule.java:None` + + +No, I think. it's not 100% copy + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java:43` + + +Can `ENUMERABLE_TOP_K_RULE` be eliminated with `ENUMERABLE_TOP_K_MERGE_RULE`? + +It seems the former converts limit+sort from logical to enumerable top-k, while the latter converts limit+sort from enumerable ones to enumerable top-k. But the former will be converted to limit+sort enumerable plan if without the converter rule. The merge rule seem to cover this case + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java:43` + + +No before #4992 , let me try it now, will remove it if no explain case fails + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java:43` + + +Thanks 4992, removing EnumerableTopKConverterRule works now. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:440` + + +[question] Curious when and how could it happens with this change? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +Shall we prevent push down `CalciteEnumerableTopK` in `SortIndexScanRule`? I think we should only push down an `EnumerableSort` for a physical level plan. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableTopK.java:20` + + +Missing Override `copy` method. If not, this class will downgrade to its parent later when do copy. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java:440` + + +The issue seems only happens without pruning. Now I cannot reproduce it. But for multisearch test `testExplainMultisearchTimestampInterleaving`, we can remove the second duplicated LIMIT pushdown: `LIMIT->5, LIMIT->5`. We can keep this logic. + + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableTopK.java:20` + + +added + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableTopK.java:20` + + +> Missing Override `copy` method. If not, this class will downgrade to its parent later when do copy. + +@coderabbitai This is a good finding. Can you learn something from it? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:None` + + +fixed + + +## General Comments + + +### @LantaoJin + + +Let's suspend on merging until https://github.com/opensearch-project/sql/pull/4992 merged + + +--- + +# PR #4992: Prune old in operator push down rules + +**URL:** https://github.com/opensearch-project/sql/pull/4992 + +**Author:** @qianheng-aws + +**Created:** 2025-12-23T08:45:47Z + +**State:** MERGED + +**Merged:** 2025-12-26T07:30:22Z + +**Changes:** +451 -384 (110 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +This PR primally supports pruning the old operator if we get a better plan than before. Therefore, it will improve the efficiency of planning process by avoid exploring meaningless equivalent plans. + +The performance gain is shown below: +1. Average query cost on big5 and clickbench: + +  | previous | pruneOld +-- | -- | -- +big5 | 17ms | 14ms +clickbench | 26 ms | 19ms + +2. Average optimization cost on big5 and clickbench (By enable calcite's debug mode and adding timingTracer, it will induce more time cost than before) + + +  | previous | pruneOld +-- | -- | -- +big5 | 36.3 ms | 25.9 ms +clickbench | 265 ms | 110.9 ms + +3. Average number of applied rules on big5 and clickbench: + + +  | previous | pruneOld +-- | -- | -- +big5 | 114 | 50 +clickbench | 474 | 215 + +Some positive cases on plan with this PR: +- testDedupRename +- testCasePushdownAsRangeQueryExplain +- testExplainOnAggregationWithFunction +- testDedupExpr + +### Implementation Details + +As described in https://github.com/opensearch-project/sql/issues/4931#issuecomment-3645518565, there is also many issues and bug spotted after pruning old. So there is additional change to fix them and make it compatible: + +- As there is Subset reuse in Calcite, pruning a Subset which is the only child of other Subset will cause preparing failure. So we should prune the old from top to down and stop if the current node cannot be pruned. One node cannot be pruned if it's physical node(see the point5 in the above comment) or it has multiple parents(except the root of the call, as we are generating a new root to replace it). +- Make `PPLAggregateConvertRule`, `PPLAggGroupMergeRule`, 'RareTopPushdownRule', 'DedupPushDownRule' implements `SubstitutionRule` so they will get higher priority on rule match and then we can get optimized aggregates in RelSubset before pruning. +- Support removing `project`, `sort` and agg derived `filter` when doing aggregate push down. +- Slightly refactor `AggregateIndexScanRule` so it can support pushing down on more cases +- Refactor and simplify `DedupPushDownRule` so it can get compatible with the current pruning mechanism +- Continue pushing down limit if it can reduce the estimated row count. +- Fix several bugs in `AggregateAnalyzer` when the project is null. See UT in `AggregateAnalyzerTest`, its expected results is wrong before. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4931 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +I'd like to verify the checksum of clickbench result before being merged + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:746` + + +How does the condition `volcanoPlanner.getSubsetNonNull(rel).getParentRels().size() == 1` guarantee that the rel node's parent is pruned? + +Is it because that if the index is greater than 0, then index 0 must have been traversed? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:36` + + +Why do some rules implement `SubstitutionRule`, while some don't? + + + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:47` + + +should be wrapped in `InterruptibleRelRule`? +``` +public class PPLAggGroupMergeRule extends InterruptibleRelRule implements SubstitutionRule +``` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:107` + + +how about move `tryPruneRelNodes(call)` into `InterruptibleRelRule`? +``` + onMatchImpl(call); ++ if (this instanceof SubstitutionRule) { ++ tryPruneRelNodes(call); ++ } +``` + + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml:11` + + +Critical: the original query is `count() by UserID, SearchPhrase`, the order matter for results + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:208` + + +I'm curious why can this rule be safely eliminated + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml:11` + + +ditto + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml:10` + + +Critical: Seems a bad case? the group key is an expression which should be pushed to script agg pushdown + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:36` + + +Good question! We need to update dev-doc to explain how to choose proper base rule. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:746` + + +Yes, if the current RelNode has only 1 parent, it must be the RelNode in `call.rels[current_offset - 1]` and it has been pruned in the previous step of this stream + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java:746` + + +The best way is looking up its all parents in `VolcanoPlanner.prunedNodes` while its unaccessible + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:208` + + +I refactor the dedup push down rule by unifying 2 rules into 1. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:36` + + +Basically, we can only add `SubstitutionRule` if we can ensure it's better than the previous ones. As said in Calcite's comment for `SubstitutionRule` + +> A rule that implements this interface indicates that the new RelNode is typically better than the old one. + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:107` + + +As discussed offline, `prune old` shouldn't forcefully be bound to `SubstitutionRule`. Keep flexibility currently. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml:10` + + +This is a good case of applying `PPLAggGroupMergeRule`. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml:11` + + +In SQL, `group by a, b` is equivalent to `group by b, a`, many logic follows this principle in Calcite. And this is case is affected by `AggregateProjectMergeRule` in Calcite. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:186` + + +add `private` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:190` + + +ditto + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java:47` + + +InterruptibleRelRule is in package `opensearch` and has dependency on `OpenSearchTimeoutException` while this rule is package `core`. + +Therefore, we cannot make this extends `InterruptibleRelRule` unless move that from package `opensearch` to `core` and add library `opensearch` in core gradle. + +On the other hand, if there is interrupt triggered in planning process, it should be detected in our push down rules in package `opensearch`. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:186` + + +We should give the methods in `AggregateBuilderHelper` package level accessibility. I see all methods in this class are using default symbol + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:12` + + +It's odd why this IT passed with `{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}` + +It should be `desc` + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml:12` + + +ASC is the default order. SORT is removed as it’s before DEDUP + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4991: Dedup pushdown (TopHits Agg) should work with Object fields + +**URL:** https://github.com/opensearch-project/sql/pull/4991 + +**Author:** @LantaoJin + +**Created:** 2025-12-23T08:03:13Z + +**State:** MERGED + +**Merged:** 2025-12-24T07:26:59Z + +**Changes:** +53 -27 (6 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +#4844 converted `dedup` to TopHits Agg. But failed to parse dedup column if the column is a child of Object field. +#4360 restored the internal primitive value in a Map for Aggregates (first, last, min, max) which stored these Map objects in their accumulators.(first, last, min, max) stored these Map objects in their accumulators. But this fixing was not necessary since #4844 fixed them in other way. + +In this PR: +1. fix the bug of get the dedup column names +2. revert #4360 + +### Related Issues +Resolves #4990 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @yuancu - APPROVED + + +LGTM + + +## Review Comments + + +### @aalva500-prog on `integ-test/src/test/resources/expectedOutput/calcite/big5/dedup_metrics_size_field.yaml:1` + + +Hi @LantaoJin, thank you for the changes. The `dedup` command now works, but looks like the query execution plan changed. Looks like it now includes a PROJECT pushdown optimization: + +``` +curl -X POST "localhost:9200/_plugins/_ppl/_explain" \ +-H "Content-Type: application/json" \ +-d '{ +"query": "source = big5 | dedup metrics.size | sort - @timestamp" +}' +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$7], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], data_stream=[$18], host=[$22], metrics=[$24], aws=[$27], event=[$32])\n LogicalSort(sort0=[$17], dir0=[DESC-nulls-last])\n LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], data_stream=[$18], data_stream.dataset=[$19], data_stream.namespace=[$20], data_stream.type=[$21], host=[$22], host.name=[$23], metrics=[$24], metrics.size=[$25], metrics.tmin=[$26], aws=[$27], aws.cloudwatch=[$28], aws.cloudwatch.ingestion_time=[$29], aws.cloudwatch.log_group=[$30], aws.cloudwatch.log_stream=[$31], event=[$32], event.dataset=[$33], event.id=[$34], event.ingested=[$35], _id=[$36], _index=[$37], _score=[$38], _maxscore=[$39], _sort=[$40], _routing=[$41])\n LogicalFilter(condition=[<=($42, 1)])\n LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], data_stream=[$18], data_stream.dataset=[$19], data_stream.namespace=[$20], data_stream.type=[$21], host=[$22], host.name=[$23], metrics=[$24], metrics.size=[$25], metrics.tmin=[$26], aws=[$27], aws.cloudwatch=[$28], aws.cloudwatch.ingestion_time=[$29], aws.cloudwatch.log_group=[$30], aws.cloudwatch.log_stream=[$31], event=[$32], event.dataset=[$33], event.id=[$34], event.ingested=[$35], _id=[$36], _index=[$37], _score=[$38], _maxscore=[$39], _sort=[$40], _routing=[$41], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $25)])\n LogicalFilter(condition=[IS NOT NULL($25)])\n CalciteLogicalIndexScan(table=[[OpenSearch, big5]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$7], dir0=[DESC-nulls-last])\n CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, data_stream, host, metrics, metrics.size, aws, event], AGGREGATION->rel#2183:LogicalAggregate.NONE.[](input=LogicalProject#2181,group={0},agg#0=LITERAL_AGG(1))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"data_stream\",\"host\",\"metrics\",\"metrics.size\",\"aws\",\"event\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":10000,\"sources\":[{\"metrics.size\":{\"terms\":{\"field\":\"metrics.size\",\"missing_bucket\":false,\"order\":\"asc\"}}}]},\"aggregations\":{\"$f1\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"metrics.size\",\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"data_stream\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\":[]},\"script_fields\":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +``` + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/big5/dedup_metrics_size_field.yaml:1` + + +> Looks like it now includes a PROJECT pushdown optimization + +Did you run on the latest code? I didn't see the project pushdown action in the explain output. + + +## General Comments + + +### @LantaoJin + + +cc @aaarone90 +cc @ahkcs can you help to confirm the current fixing could address the issue of https://github.com/opensearch-project/sql/issues/4359 as long as the `CalcitePPLAggregationIT` passed? + + +--- + +# PR #4985: [AUTO] Increment version to 2.19.5-SNAPSHOT + +**URL:** https://github.com/opensearch-project/sql/pull/4985 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-22T21:36:43Z + +**State:** MERGED + +**Merged:** 2026-01-07T16:34:53Z + +**Changes:** +1 -1 (1 files) + +**Labels:** `v2.19.5` + + +## Description + +- Incremented version to **2.19.5-SNAPSHOT**. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @penghuo + + +Version bump, Ignore IT + + +--- + +# PR #4983: Support profile options for PPL - Part I Implement phases level metrics. + +**URL:** https://github.com/opensearch-project/sql/pull/4983 + +**Author:** @penghuo + +**Created:** 2025-12-22T19:45:12Z + +**State:** MERGED + +**Merged:** 2026-01-09T02:31:35Z + +**Changes:** +655 -11 (23 files) + +**Labels:** `enhancement`, `PPL`, `backport-failed`, `backport 2.19-dev` + +**Assignees:** @penghuo + + +## Description + +### Description +- Introduce query profiling framework with profile contexts, metrics, and thread-local lifecycle helpers. +- Propagate PPL `profile` flag through transport and request parsing, enforce supported formats/paths, and document profiling usage. +- Capture per-phrase metrics across planning, optimization, execution, and response formatting; add profiling teardown and update related tests/expectations. +- Doc, https://github.com/penghuo/os-sql/blob/ee0d477cd56cdeddec6da15a334b82f606245ac5/docs/user/ppl/interfaces/endpoint.md#profile + +### Related Issues +https://github.com/opensearch-project/sql/issues/4294 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @qianheng-aws - COMMENTED + + +@penghuo Is this profile metric safe for parallelism updating? For `OPENSEARCH_TIME`, it may be updated by multiple thread if there is join or union. + + +### @LantaoJin - CHANGES_REQUESTED + + +above + + +## Review Comments + + +### @penghuo on `integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.profile.yml:None` + + +ignore it. + + +### @dai-chen on `docs/user/ppl/interfaces/endpoint.md:None` + + +Any future plan to include DSL profile output for each stage? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:43` + + +Is atomic operation required here? + + +### @dai-chen on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:205` + + +Just found this doesn't call listener properly which may cause profile context leak? + + +### @dai-chen on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:150` + + +This is to avoid NPE somewhere? + + +### @penghuo on `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java:205` + + +Cleanup in [TransportPPLQueryAction](https://github.com/opensearch-project/sql/pull/4983/changes#diff-fd5cf7d596c746360ff54c37c1bd21bee320ee21ace49a03f1a5617b68150685R209) + + +### @penghuo on `docs/user/ppl/interfaces/endpoint.md:None` + + +No plan yet, Let me as experimental for profile option. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/executor/QueryService.java:150` + + +yes, explain endpoint does not support profile. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:43` + + +not necessary. Fix in https://github.com/opensearch-project/sql/pull/4983/changes/820d75783fcc9b1ab14f9366bd42dbc9b6929d9e + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java:None` + + +Should use `add` here since we support pagination for aggregate and this code will be called multiplet-times? + + +### @qianheng-aws on `core/src/main/java/org/opensearch/sql/monitor/profile/MetricName.java:9` + + +Do we need PARSE_TIME, though it should be tiny? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:None` + + +Should be AtomicLong? We have multi-thread usage in opensearch_time + + +### @LantaoJin on `docs/user/ppl/interfaces/endpoint.md:None` + + +I'd like make all the metrics lower case which to align with OpenSearch DSL profile + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/monitor/profile/QueryProfiling.java:19` + + +We search with multiple threads on JOIN/Subsearch queries. We the total time should sum them. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/MetricName.java:9` + + +I ignore it for now, should be minial. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/DefaultMetricImpl.java:None` + + +Add back. + + +### @penghuo on `docs/user/ppl/interfaces/endpoint.md:None` + + +done. + + +### @penghuo on `core/src/main/java/org/opensearch/sql/monitor/profile/QueryProfiling.java:19` + + +see comments. https://github.com/opensearch-project/sql/pull/4983#issuecomment-3726499582 + + +## General Comments + + +### @penghuo + + +> @penghuo Is this profile metric safe for parallelism updating? For `OPENSEARCH_TIME`, it may be updated by multiple thread if there is join or union. +> We search with multiple threads on JOIN/Subsearch queries. We the total time should sum them. + +Agree. I sepereate into 2 PRs. +The final output include Phases (current PR), and inlcude a Plan section to include operator level metrics PostgreSQL style (2nd PR). +``` +"profile": { + "summary": { + "total_time_ms": 123.4 + }, + "phases": { + "analyze": { "time_ms": 4.8 }, + "optimize": { "time_ms": 8.3 }, + "execute": { "time_ms": 95.0 }, + "format": { "time_ms": 14.1 } + }, + "plan": { + "node": "Result", + "time_ms": 14.0, + "rows": 1000, + "children": [ + { + "node": "HashJoin", + "time_ms": 60.0, + "rows": 1000, + "children": [ + { "node": "Scan(index_a)", "time_ms": 30.0}, + { "node": "Scan(index_b)", "time_ms": 28.0} + ] + } + ] + } +} +``` + + + +--- + +# PR #4981: Remove GetAlias Call + +**URL:** https://github.com/opensearch-project/sql/pull/4981 + +**Author:** @aparajita31pandey + +**Created:** 2025-12-22T04:37:12Z + +**State:** MERGED + +**Merged:** 2026-01-09T17:44:55Z + +**Changes:** +43 -9 (2 files) + +**Labels:** `backport 2.19-dev`, `bugFix` + + +## Description + +### Description +This diff is removes a redundant alias-resolution call via the `GET /_alias/` API that requires the caller to have extra indices:admin/aliases/get privileges, which can cause permission issues when executing read queries. + +It instead leverages existing `GetFieldMapping` call that works for both index and alias. + +Tested Functionality +#### With Index Name +``` +curl -X POST "localhost:9200/_plugins/_sql" -H 'Content-Type: application/json' \ + -d '{ + "query": "SELECT * FROM my-index", + "fetch_size": 1, + "filter": { + "term": { + "another_field": "hello" + } + } + }' +Output - +{ + "schema": [ + { + "name": "new_field", + "type": "text" + }, + { + "name": "another_field", + "type": "keyword" + } + ], + "total": 1, + "datarows": [[ + "Some text", + "hello" + ]], + "size": 1, + "status": 200 +} +``` + +#### With Alias Name - Same Output +``` +curl -X POST "localhost:9200/_plugins/_sql" -H 'Content-Type: application/json' \ + -d '{ + "query": "SELECT * FROM my-alias", + "fetch_size": 1, + "filter": { + "term": { + "another_field": "hello" + } + } + }' +Output - +{ + "schema": [ + { + "name": "new_field", + "type": "text" + }, + { + "name": "another_field", + "type": "keyword" + } + ], + "total": 1, + "datarows": [[ + "Some text", + "hello" + ]], + "size": 1, + "status": 200 +} +``` +### Related Issues +Resolves +#2960, https://github.com/opensearch-project/security/issues/5871 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - DISMISSED + + +Ideally this should have an integ test, but nothing wrong with the impl + + +### @aalva500-prog - APPROVED + + +LGTM, but agree with @Swiddis that some test would be ideal. + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +## Review Comments + + +### @dai-chen on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java:None` + + +Could you double check if multiple entires can be returned in certain case? Just want to make sure this check is not too strict and fail in valid case. + +``` +# Create test-index-1, 2, 3 +curl -X PUT "localhost:9200/test-index-1" -H 'Content-Type: application/json' -d' +{ + "mappings": { + "properties": { + "name": { "type": "text" }, + "age": { "type": "integer" }, + "timestamp": { "type": "date" } + } + } +}' + +... + +curl -X POST "localhost:9200/_aliases" -H 'Content-Type: application/json' -d' +{ + "actions": [ + { "add": { "index": "test-index-*", "alias": "wildcard-alias" } } + ] +}' + +curl -X GET "localhost:9200/wildcard-alias/_mapping?pretty" +{ + "test-index-1" : { + ... + }, + "test-index-2" : { + ... + }, + "test-index-3" : { + ... +} +``` + + +### @aalva500-prog on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java:None` + + +Hi @aparajita31pandey, could you take a look at this? thanks! + + +### @aparajita31pandey on `legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java:None` + + +@dai-chen Thankyou for pointing this out. I have updated the code and have added similar `integ-test` around it. Please have a look. + + +## General Comments + + +### @aparajita31pandey + + +@LantaoJin Can I please get a review ? + + +### @Swiddis + + +Re: integ tests, I have https://github.com/opensearch-project/sql/pull/5008 which lays a lot more groundwork for adding more permissions-related tests to our codebase. + +Since this PR was opened before that existed, I won't block on it -- after both PRs merge I'll write a small task for myself to add a test for this. + + +### @aparajita31pandey + + +@Swiddis @aalva500-prog I have added a small integ test for this change. Can I get a re-review ? + + +--- + +# PR #4979: Support nested aggregation when calcite enabled + +**URL:** https://github.com/opensearch-project/sql/pull/4979 + +**Author:** @LantaoJin + +**Created:** 2025-12-19T09:59:29Z + +**State:** MERGED + +**Merged:** 2026-01-05T02:46:24Z + +**Changes:** +1181 -215 (43 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Refactor implementation for PPL: https://github.com/opensearch-project/sql/pull/3696 (closed) +Deprecated implementation for SQL: https://github.com/opensearch-project/sql/pull/2814 (closed) + +Support [nested aggregation](https://docs.opensearch.org/docs/latest/aggregations/bucket/nested/) in PPL only when calcite enabled. + +With this PR, follow PPL query is able to execute a nested aggregation query. +```SQL +source=logs | head 10000 | stats min(pages.load_time) +``` +And it equals the DSL +``` +GET logs/_search +{ + "aggs": { + "pages": { + "nested": { + "path": "pages" + }, + "aggs": { + "min_load_time": { "min": { "field": "pages.load_time" } } + } + } + } +} +``` + +Follow queries (group-by nested path) are supported with this PR as well: +``` +source=test | top pages.load_time +source=test | stats count() by pages.load_time +source=test | dedup pages.load_time +``` + +**Limitation:** +- PPL only +- Calcite should be enabled +- Throw **UnsupportedOperationException** if pushdown cannot be applied. +- Follow queries (group-by nested root path) are supported with this PR without pushdown enahncement: +``` +source=test | top pages +source=test | stats count() by pages +source=test | dedup pages +``` + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4949, https://github.com/opensearch-project/sql/issues/4564, https://github.com/opensearch-project/sql/issues/2813 and https://github.com/opensearch-project/sql/issues/2529, and https://github.com/opensearch-project/sql/issues/2739 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:179` + + +After upgraded to 1.41. this method was not called any more, change it to `createPrepare()` + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java:301` + + +This hook was called twice for non-full-scannable plan + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintStrategyTable.java:None` + + +rename `stats_args` to `agg_args` since aggregation was not only happens in `stats` command. + + +### @LantaoJin on `docs/user/ppl/interfaces/endpoint.md:114` + + +avoid whole-plan-pushdown + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java:39` + + +For some exception without cause (root exception), the actual root cause may be attached in its suppressed cause. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java:28` + + +Prefer to display the root cause instead of wrapping exception. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:241` + + +To avoid ambiguous, we call the multiple sub-aggregations `structured` aggregations instead of `nested` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:367` + + +Here the value could be `min(a.b), count(c)`, rename `aggFieldNames` to `aggNames`. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4978: [Backport 2.19-dev] Support pushdown dedup with expression (#4957) + +**URL:** https://github.com/opensearch-project/sql/pull/4978 + +**Author:** @LantaoJin + +**Created:** 2025-12-19T07:03:33Z + +**State:** MERGED + +**Merged:** 2025-12-22T02:17:24Z + +**Changes:** +645 -170 (39 files) + + +## Description + +(cherry picked from #4957 commit cbcdbd6fc918e4a356480300c208aa76f468fbf1) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4976: Add Frequently Used Big5 PPL Queries + +**URL:** https://github.com/opensearch-project/sql/pull/4976 + +**Author:** @aalva500-prog + +**Created:** 2025-12-18T22:30:01Z + +**State:** MERGED + +**Merged:** 2026-01-06T18:58:48Z + +**Changes:** +157 -2 (7 files) + +**Labels:** `testing`, `backport 2.19-dev` + + +## Description + +### Description +This PR continues the work done in PR #4816 to add frequent used queries to the big5 workload based on gap analysis between existing benchmarks and frequent used query patterns. + +`dedup` query is added here: #4991 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @LantaoJin - COMMENTED + + +can you resolve the conflicts + + +## Review Comments + + +### @anasalkouz on `integ-test/src/test/resources/big5/queries/rex_regex_transformation.ppl:25` + + +nit: maybe explain what this query do + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +Any reason to do correctness check for this one? I thought this IT is only for benchmark? + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:741` + + +This PR is only to add more test queries right? Why the behavior changed? + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java:741` + + +The behavior changed because the file `integ-test/src/test/resources/big5/data/big5.json` was modified and now has more data. + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +This PR is to continue the work done here by @noCharger: https://github.com/opensearch-project/sql/pull/4816. I don't have the whole context, unfortunately. However, I think it is not only for benchmark, as the same was done for the`dedup` command in this PR: https://github.com/opensearch-project/sql/pull/4991 + + +### @dai-chen on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +But other test methods (except dedup) only do timing without this assertion? My understanding is this IT shouldn't do correctness check. + + +### @aalva500-prog on `integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java:None` + + +Removed correctness check for rex command. + + +## General Comments + + +### @aalva500-prog + + +@Swiddis the SQL CLI Integration tests are failing, any recommendations on how to fix this? + + +--- + +# PR #4974: Add unified query compiler API + +**URL:** https://github.com/opensearch-project/sql/pull/4974 + +**Author:** @dai-chen + +**Created:** 2025-12-18T17:59:34Z + +**State:** MERGED + +**Merged:** 2026-01-07T18:01:02Z + +**Changes:** +554 -42 (10 files) + +**Labels:** `enhancement`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces a `UnifiedQueryCompiler` as part of the Unified Execution Runtime, enabling direct evaluation of PPL queries via a reference implementation. It completes the third pillar of the Unified Query API (alongside unified query planner and transpiler) and allows external consumers to execute PPL end-to-end using a Calcite-based in-memory evaluator, as described in #4782. + +**Key Changes** + +- `UnifiedQueryCompiler`: Introduces a new API that compiles Calcite logical plans into executable JDBC statements. +- `UnifiedQueryContext` lifecycle management: Implements AutoCloseable to properly manage resource lifecycle. +- Integration tests: Adds end-to-end integration tests demonstrating the complete workflow from context creation, query planning, compilation, and execution. + +### Related Issues +Resolves https://github.com/opensearch-project/sql/issues/4894 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `api/README.md:121` + + +UnifiedQueryCompiler.compile() compiles a RelNode into an executable query plan by leveraging Calcite’s Enumerable physical operators? + + +### @dai-chen on `api/README.md:121` + + +Yes, it's the same as current PPL Calcite logic in core module. Thanks! + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4971: [Backport 2.19-dev] Add scalar min/max to BuiltinFunctionName (#4967) + +**URL:** https://github.com/opensearch-project/sql/pull/4971 + +**Author:** @LantaoJin + +**Created:** 2025-12-18T02:13:17Z + +**State:** MERGED + +**Merged:** 2025-12-18T06:52:23Z + +**Changes:** +14 -7 (4 files) + + +## Description + +(cherry picked from #4967 commit 7dfabcea94952ead0a27463d810097d74446acc4) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4970: [Backport 2.19-dev] Extract unified query context for shared config management + +**URL:** https://github.com/opensearch-project/sql/pull/4970 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-17T22:54:42Z + +**State:** MERGED + +**Merged:** 2025-12-18T02:14:00Z + +**Changes:** +343 -215 (7 files) + + +## Description + +Backport 297074c1ed595c9d2e6ec34b6cca8ad6a247d0ea from #4933. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4967: Add scalar min/max to BuiltinFunctionName + +**URL:** https://github.com/opensearch-project/sql/pull/4967 + +**Author:** @LantaoJin + +**Created:** 2025-12-17T05:59:15Z + +**State:** MERGED + +**Merged:** 2025-12-18T02:09:50Z + +**Changes:** +14 -7 (4 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Add scalar min/max to BuiltinFunctionName. + +Since parsing min/max aggregation function and scalar function (eval function) in AST parser are separated, we can use different names in `BuiltinFunctionName` with no changes in PPL interface/syntax. + +`eval a = max(b)` -> SCALAR_MAX +`stats max(b)/eventstats max(b)/streamstats max(b)` -> (AGG) MAX + +### Related Issues +Resolves #4774 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the fix! + + +## Review Comments + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:2908` + + +Can you please kindly add a javadoc on `BuiltinFunctionName.of` to instruct future developers to use `BuiltinFunctionName.ofAggregation` for aggregation functions? + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4965: [Backport 2.19-dev] Support `mvmap` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4965 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-16T22:21:56Z + +**State:** MERGED + +**Merged:** 2025-12-17T02:34:10Z + +**Changes:** +499 -11 (15 files) + + +## Description + +Backport 11727a488a77ae392ef0f9da4bbde601937ffc5f from #4856. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4964: [Backport 2.19-dev] Feature addtotals and addcoltotals + +**URL:** https://github.com/opensearch-project/sql/pull/4964 + +**Author:** @dai-chen + +**Created:** 2025-12-16T22:12:48Z + +**State:** MERGED + +**Merged:** 2025-12-17T00:26:46Z + +**Changes:** +2313 -1 (25 files) + +**Assignees:** @dai-chen + + +## Description + +Backport 15e2411aba434f39d9b0c0d57e0f9b2e1b6f4c87 from #4754 with integration test fix by enabling Calcite. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4963: [Backport 2.19-dev] issue #4514 tonumber function as part of roadmap #4287 + +**URL:** https://github.com/opensearch-project/sql/pull/4963 + +**Author:** @dai-chen + +**Created:** 2025-12-16T19:28:01Z + +**State:** MERGED + +**Merged:** 2025-12-17T00:26:13Z + +**Changes:** +688 -2 (11 files) + +**Assignees:** @dai-chen + + +## Description + +Backport 342a78be6b83fb36df839cc840bb90b687a4751e from #4605 with additional integration test fix by enabling Calcite. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4962: Update CodeRabbit instructions + +**URL:** https://github.com/opensearch-project/sql/pull/4962 + +**Author:** @ykmr1224 + +**Created:** 2025-12-15T21:27:08Z + +**State:** MERGED + +**Merged:** 2025-12-17T23:04:44Z + +**Changes:** +95 -4 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Update CodeRabbit instructions based on #4497, #4605, #4675 + +### Related Issues +- https://github.com/opensearch-project/sql/issues/4889 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the changes! + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4961: [Backport 2.19-dev] Support `mvzip` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4961 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-15T21:08:05Z + +**State:** MERGED + +**Merged:** 2025-12-16T20:39:56Z + +**Changes:** +457 -1 (10 files) + + +## Description + +Backport 52a691a7e63c03f7acf7b1df0f118a3952257f95 from #4805. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4958: Escape underscore character in documentation for LIKE + +**URL:** https://github.com/opensearch-project/sql/pull/4958 + +**Author:** @Blitheness + +**Created:** 2025-12-15T14:06:24Z + +**State:** MERGED + +**Merged:** 2025-12-15T21:21:44Z + +**Changes:** +3 -3 (1 files) + +**Labels:** `documentation`, `PPL` + + +## Description + +### Description +Properly escape underscore character in documentation for LIKE so it doesn't render as italicised text. + +### Related Issues +NA + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @penghuo - APPROVED + + +Thanks! + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4957: Support pushdown dedup with expression + +**URL:** https://github.com/opensearch-project/sql/pull/4957 + +**Author:** @LantaoJin + +**Created:** 2025-12-15T10:18:02Z + +**State:** MERGED + +**Merged:** 2025-12-19T06:41:38Z + +**Changes:** +655 -170 (39 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Support pushdown `dedup` with expression: +- the dedup columns contain expressions + - `| eval new_gender = lower(gender), new_name = lower(name) | dedup 2 new_gender, new_name` +- the other columns contain expressions + - `| eval new_gender = lower(gender), new_name = lower(name) | dedup 2 gender, name` + +This PR also implicitly support pushdown `join with max option` with expressions +- the join keys contain expressions + - `source = t1 | eval new_gender = lower(gender) | join new_gender [source = t2 | eval new_gender = lower(gender) ]` +- the other columns contain expresssions + - `source = t1 | eval new_gender = lower(gender) | join gender [source = t2 | eval new_gender = lower(gender) ]` + +### Related Issues +Resolves #4789 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +nit: use `var` or `Map` + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:91` + + +Seems the comment also needs updated, as it no longer returns source but fields + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +Can you explain the intuition / reason for reordering the columns? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +That's why I suggested you to use aggregateWithTrimming() instead of calling the relBuilder.aggregate() directly. In SQL, the Project added for Aggregate always keeps the group columns in the front of others, it means the group keys set always contain {0}, or there could be trigger a Calcite bug. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:91` + + +It still returns source, the field will be added when scripts are existed. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:94` + + +Seems this be simplified to +``` + dedupColumnIndices.stream() + .map(projectWithWindow.getInput().getRowType().getFieldNames()::get) + .toList() +``` + + + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:96` + + +Will this ever happened since dedupColumnNames is derived from dedupColumnIndices? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +I don't get it as well. Since the `aggregate` is going to be pushed into scan and the final generated RelNode is a single `Scan` operator, will it really trigger the bug in Calcite? + +Do we do similar thing in the previous PR for supporting dedup push down? + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +There is already `import org.apache.commons.lang3.tuple.Pair` imported. Could it be switched to that or at least unified? + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java:None` + + +sure. will convert to common.lang3's Pair + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java:None` + + +The definition is in L93 `.>map(`. But yes, I can add `Map` here and remove L93. The reason use L93 instead of `Map` is we used unnamed variable, but now it has to define a name. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:94` + + +No. above code doesn't cover the rename case: +``` +source=test | rename status as http_status | dedup http_status | fields http_status | sort http_status +``` + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:96` + + +I am 100% sure they have same size. Better to keep this check. + + +### @LantaoJin on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +> I don't get it as well. Since the `aggregate` is going to be pushed into scan and the final generated RelNode is a single `Scan` operator, will it really trigger the bug in Calcite? +> +> Do we do similar thing in the previous PR for supporting dedup push down? + +Yes. I see bugs when the group key columns are not in the front of child Project in Calcite in developing support Aggregate with Calcite. Not sure is it fixed or not. Check the comment here https://github.com/opensearch-project/sql/blob/7dfabcea94952ead0a27463d810097d74446acc4/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java#L1049 + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java:118` + + +That happens in CalciteRelNodeVisitor, but is similar change needed in the push down process? + + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4956: [Backport 2.19-dev] Pushdown join with `max=n` option to TopHits aggregation (#4929) + +**URL:** https://github.com/opensearch-project/sql/pull/4956 + +**Author:** @LantaoJin + +**Created:** 2025-12-12T07:24:55Z + +**State:** MERGED + +**Merged:** 2025-12-12T08:15:42Z + +**Changes:** +168 -130 (27 files) + + +## Description + +(cherry picked from #4929 commit 4bf5c9c776e7f8cb11714d68fbc2c9163475ef23) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4955: remove shadow jar + +**URL:** https://github.com/opensearch-project/sql/pull/4955 + +**Author:** @xinyual + +**Created:** 2025-12-12T06:18:09Z + +**State:** MERGED + +**Merged:** 2025-12-15T02:17:54Z + +**Changes:** +2 -69 (2 files) + +**Labels:** `dependencies`, `maintenance` + + +## Description + +### Description +We try to remove shadow jar since access control now is already removed in core. +Revert the change in PR https://github.com/opensearch-project/sql/pull/3447 + +Also fix an IT due to the import name changed from +shaded.com.google -> com.google + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +It confused me why a plan changed by your PR + + +### @xinyual on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +After decode, the script previously using +`import shaded.com.google...` +The `shaded` here is caused by shade jar. +But now we remove it, so it is +`import com.google...` +So the script changed. + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +can you run explain command with `extended` and attach the result here + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +oh, it's a v2 plan. not sure how to decode the base64 string. + + +### @xinyual on `integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml:12` + + +Using online decode and here are the result: +using shadow jar + +�^Esr�6org.opensearch.sql.expression.parse.PatternsExpressionΪG^F^B�^BZ�^PuseCustomPatternL�^Gpatternt�^YLjava/util/regex/Pattern;xr�3org.opensearch.sql.expression.parse.ParseExpression +} ^B�^DL� +identifiert�*Lorg/opensearch/sql/expression/Expression;L� +identifierStrt�^RLjava/lang/String;L�^Gpatternq�~�^CL� sourceFieldq�~�^Cxr�0org.opensearch.sql.expression.FunctionExpression*0uj{^B�^BL� argumentst�^PLjava/util/List;L� functionNamet�5Lorg/opensearch/sql/expression/function/FunctionName;xpsr�=`shaded.com.google.common.collect.ImmutableList$SerializedForm`��������^B�^A[�elementst�^S[Ljava/lang/Object;xpur�^S[Ljava.lang.Object;X^Ps)l^B��xp���^Csr�1org.opensearch.sql.expression.ReferenceExpressionD\^R^G^B�^DL�^Dattrq�~�^DL�^Epathsq�~�^FL�^GrawPathq�~�^DL�^Dtypet�'Lorg/opensearch/sql/data/type/ExprType;xpt�^Eemailsr�^Zjava.util.Arrays$ArrayList٤<͈^F^B�^A[�^Aaq�~� +xpur�^S[Ljava.lang.String;V^]{G^B��xp���^Aq�~�^Qq�~�^Qsr�:org.opensearch.sql.opensearch.data.type.OpenSearchTextType^D1D^B�^AL�^Ffieldst�^OLjava/util/Map;xr�:org.opensearch.sql.opensearch.data.type.OpenSearchDataTypec^B^E5^B�^CL� exprCoreTypet�+Lorg/opensearch/sql/data/type/ExprCoreType;L� mappingTypet�HLorg/opensearch/sql/opensearch/data/type/OpenSearchDataType$MappingType;L� +propertiesq�~�^Wxp~r�)org.opensearch.sql.data.type.ExprCoreType��������^R��xr�^Njava.lang.Enum��������^R��xpt�^GUNKNOWN~r�Forg.opensearch.sql.opensearch.data.type.OpenSearchDataType$MappingType��������^R��xq�~�^]t�^DTextsr�<*`shaded.com.google.common.collect.ImmutableMap$SerializedForm`*��������^B�^BL�^Dkeyst�^RLjava/lang/Object;L�^Fvaluesq�~�$xpuq�~� ����uq�~� ����sr�^Qjava.util.CollSerW:^[^Q^C�^AI�^Ctagxp���^Cw^D���^Bt�^Gkeywordsq�~�^X~q�~�^\t�^FSTRING~q�~� t�^GKeywordq�~�%xsr�/org.opensearch.sql.expression.LiteralExpressionEB-ǂ$^B�^AL� exprValuet�)Lorg/opensearch/sql/data/model/ExprValue;xpsr�-org.opensearch.sql.data.model.ExprStringValue�A2%s^N^S^B�^AL�^Evalueq�~�^Dxr�/org.opensearch.sql.data.model.AbstractExprValuekv^F^TD^B��xpt��sq�~�0sq�~�3t�^Npatterns_fieldsr�3org.opensearch.sql.expression.function.FunctionName 8Mg^B�^AL� functionNameq�~�^Dxpt�patternsq�~�7q�~�9q�~�2q�~�^P�p + + +And without shadow jar + +�^Esr�6org.opensearch.sql.expression.parse.PatternsExpressionΪG^F^B�^BZ�^PuseCustomPatternL�^Gpatternt�^YLjava/util/regex/Pattern;xr�3org.opensearch.sql.expression.parse.ParseExpression} ^B�^DL� +identifiert�*Lorg/opensearch/sql/expression/Expression;L� +identifierStrt�^RLjava/lang/String;L�^Gpatternq�~�^CL� sourceFieldq�~�^Cxr�0org.opensearch.sql.expression.FunctionExpression*0uj{^B�^BL� argumentst�^PLjava/util/List;L� functionNamet�5Lorg/opensearch/sql/expression/function/FunctionName;xpsr�6*`com.google.common.collect.ImmutableList$SerializedForm`*��������^B�^A[�elementst�^S[Ljava/lang/Object;xpur�^S[Ljava.lang.Object;X^Ps)l^B��xp���^Csr�1org.opensearch.sql.expression.ReferenceExpressionD\^R^G^B�^DL�^Dattrq�~�^DL�^Epathsq�~�^FL�^GrawPathq�~�^DL�^Dtypet�'Lorg/opensearch/sql/data/type/ExprType;xpt�^Eemailsr�^Zjava.util.Arrays$ArrayList٤<͈^F^B�^A[�^Aaq�~� +xpur�^S[Ljava.lang.String;V^]{G^B��xp���^Aq�~�^Qq�~�^Qsr�:org.opensearch.sql.opensearch.data.type.OpenSearchTextType^D1D^B�^AL�^Ffieldst�^OLjava/util/Map;xr�:org.opensearch.sql.opensearch.data.type.OpenSearchDataTypec^B^E5^B�^CL� exprCoreTypet�+Lorg/opensearch/sql/data/type/ExprCoreType;L� mappingTypet�HLorg/opensearch/sql/opensearch/data/type/OpenSearchDataType$MappingType;L� +propertiesq�~�^Wxp~r�)org.opensearch.sql.data.type.ExprCoreType��������^R��xr�^Njava.lang.Enum��������^R��xpt�^GUNKNOWN~r�Forg.opensearch.sql.opensearch.data.type.OpenSearchDataType$MappingType��������^R��xq�~�^]t�^DTextsr�5*`com.google.common.collect.ImmutableMap$SerializedForm`*��������^B�^BL�^Dkeyst�^RLjava/lang/Object;L�^Fvaluesq�~�$xpuq�~� ����uq�~� ����sr�^Qjava.util.CollSerW:^[^Q^C�^AI�^Ctagxp���^Cw^D���^Bt�^Gkeywordsq�~�^X~q�~�^\t�^FSTRING~q�~� t�^GKeywordq�~�%xsr�/org.opensearch.sql.expression.LiteralExpressionEB-ǂ$^B�^AL� exprValuet�)Lorg/opensearch/sql/data/model/ExprValue;xpsr�-org.opensearch.sql.data.model.ExprStringValue�A2%s^N^S^B�^AL�^Evalueq�~�^Dxr�/org.opensearch.sql.data.model.AbstractExprValuekv^F^TD^B��xpt��sq�~�0sq�~�3t�^Npatterns_fieldsr�3org.opensearch.sql.expression.function.FunctionName 8Mg^B�^AL� functionNameq�~�^Dxpt�patternsq�~�7q�~�9q�~�2q�~�^P�p + +You could see the com.google.common.collect.ImmutableList$SerializedForm path changed. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4954: [Backport 2.19-dev] [DOC] Show backticks in testing-doctest.md + +**URL:** https://github.com/opensearch-project/sql/pull/4954 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-12T03:56:22Z + +**State:** MERGED + +**Merged:** 2025-12-12T05:55:56Z + +**Changes:** +6 -6 (1 files) + + +## Description + +Backport c527afc0f0e7868068b69a05c521cf1ee7c70334 from #4941. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4953: [Backport 2.19-dev] [DOC] Callout the aggregation result may be approximate + +**URL:** https://github.com/opensearch-project/sql/pull/4953 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-12T03:55:44Z + +**State:** MERGED + +**Merged:** 2025-12-12T05:55:49Z + +**Changes:** +58 -1 (3 files) + + +## Description + +Backport 90ee47c6f909d38f5ba12cef3c2bda8c5f23cce5 from #4922. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4951: [Backport 2.19-dev] RexCall and RelDataType standardization for script push down (#4914) + +**URL:** https://github.com/opensearch-project/sql/pull/4951 + +**Author:** @qianheng-aws + +**Created:** 2025-12-12T03:05:00Z + +**State:** MERGED + +**Merged:** 2025-12-12T05:56:18Z + +**Changes:** +303 -79 (43 files) + + +## Description + +Backport https://github.com/opensearch-project/sql/commit/bcfcd002d5ec402f257a92f9689097d1c9bf8979 from https://github.com/opensearch-project/sql/pull/4914. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4950: [DOC] PPL docs website exporter script + +**URL:** https://github.com/opensearch-project/sql/pull/4950 + +**Author:** @kylehounslow + +**Created:** 2025-12-12T01:25:36Z + +**State:** MERGED + +**Merged:** 2025-12-15T21:30:17Z + +**Changes:** +273 -76 (7 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +Add an automated exporter script that transforms PPL markdown documentation tree (`docs/user/ppl/**/*.md`) into Jekyll-compatible format for the OpenSearch documentation website. + +#### Summary of changes +* Add PPL docs exporter script +* Minor nit fixes to existing markdown docs (discovered when rendering local build of docs website) + +#### Demo +Screen recording of rendered Jekyll site after exporting PPL docs. No manual changes applied. **See the files exported to documentation-website for below demo [here](https://github.com/kylehounslow/documentation-website/tree/381a7a0d733cd43bfc98d6cef748775a09ecc10e/_sql-and-ppl/ppl-reference)** + +https://github.com/user-attachments/assets/07fdf522-5b42-4b1f-a993-ab8dd7798f6c + + + + +#### Exporter script features +- **Jekyll front-matter generation**: Auto-injects layout, title, parent/grand_parent hierarchy, and navigation order +- **link resolution**: Handles all relative link patterns (../, ./, same-directory, subdirectory) +- **Jekyll anchor normalization**: Converts anchors to match Jekyll's format (removes dots/dashes) +- **Deep directory rollup**: Flattens 3+ level directories with automatic redirect_from for original paths. **This is a workaround for a limitation in `just-the-docs` theme on docs website**. +- **Content transformations**: + - Converts PPL code fences to SQL syntax highlighting + - Adds copy buttons to code blocks +- **Directory heading mappings**: Consistent navigation titles (e.g., cmd → "Commands") + +### Related Issues +* https://github.com/opensearch-project/sql/issues/4854 +* https://github.com/opensearch-project/sql/pull/4912 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @Swiddis - APPROVED + + +lgtm, could probably find maintainability nits if I tried but since this is a one-off script we probably don't need to worry about it + + +## Review Comments + + +### @kylehounslow on `scripts/docs_exporter/export_to_docs_website.py:146` + + +This is incorrect. Without global .md replacement, it would miss anchor links (headings). Example from [ppl/cmd/grok.md](https://github.com/opensearch-project/sql/blob/4bf5c9c776e7f8cb11714d68fbc2c9163475ef23/docs/user/ppl/cmd/grok.md?plain=1#L86): + +```` +## Limitations +The grok command has the same limitations as the parse command, see [parse limitations](./parse.md#Limitations) for details. +```` + + +### @Swiddis on `scripts/docs_exporter/export_to_docs_website.py:301` + + +non-portable paths, but if i understand right this only runs in CI servers or in a box where we control the platform + + +### @penghuo on `docs/user/ppl/index.md:14` + + +when should add ppl ignore? + + +### @kylehounslow on `docs/user/ppl/index.md:14` + + +The `ignore` keyword will [omit the code block from doctests](https://github.com/opensearch-project/sql/blob/ccdb0f77454fa2fcba33540b87e934ec89a25b67/doctest/markdown_parser.py#L139-L140). For this case, we want to exclude from doctests but still want `ppl` syntax highlighting applied. +Note: All `ppl` blocks are [converted to `sql` formatting](https://github.com/kylehounslow/sql/blob/3354c91512f0369b58bf80ffb72e084870308394/scripts/docs_exporter/export_to_docs_website.py#L151) on export to docs website (`sql` is the closest format supported for syntax highlighting). + + +### @kylehounslow on `scripts/docs_exporter/export_to_docs_website.py:301` + + +Uses `pathlib.Path` object with `/` operator ([src](https://github.com/python/cpython/blob/27a2e49d1849751008ea5807558129e11d35fb7a/Lib/pathlib/__init__.py#L176)). Should be portable to all OS + + +### @Swiddis on `scripts/docs_exporter/export_to_docs_website.py:301` + + +Ah, I didn't realize the slash operator carrried to the tailing strings. I thought we'd end up with `{script_dir}\../...`. TIL + + +## General Comments + + +### @kylehounslow + + +@coderabbitai review + + +--- + +# PR #4946: [Backport 3.4] Update 3.4 release note + +**URL:** https://github.com/opensearch-project/sql/pull/4946 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-11T17:33:13Z + +**State:** MERGED + +**Merged:** 2025-12-11T17:55:49Z + +**Changes:** +8 -8 (1 files) + + +## Description + +Backport fc3a9355a84d33db2d30be72c7a2b000de7787f8 from #4939. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4945: [Backport main] Update 3.4 release note + +**URL:** https://github.com/opensearch-project/sql/pull/4945 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-11T17:33:01Z + +**State:** MERGED + +**Merged:** 2025-12-11T17:55:45Z + +**Changes:** +8 -8 (1 files) + + +## Description + +Backport fc3a9355a84d33db2d30be72c7a2b000de7787f8 from #4939. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4942: [Backport 2.19-dev] Replace duplicated aggregation logic with `aggregateWithTrimming()` + +**URL:** https://github.com/opensearch-project/sql/pull/4942 + +**Author:** @ishaoxy + +**Created:** 2025-12-11T07:56:50Z + +**State:** MERGED + +**Merged:** 2025-12-11T08:28:44Z + +**Changes:** +65 -82 (10 files) + + +## Description + +### Description +backport #4926 + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4941: [DOC] Show backticks in testing-doctest.md + +**URL:** https://github.com/opensearch-project/sql/pull/4941 + +**Author:** @LantaoJin + +**Created:** 2025-12-11T05:15:36Z + +**State:** MERGED + +**Merged:** 2025-12-11T08:32:03Z + +**Changes:** +6 -6 (1 files) + +**Labels:** `documentation`, `backport 2.19-dev` + + +## Description + +### Description +Show backticks in testing-doctest.md + +Before: +Screenshot 2025-12-11 at 1 14 09 PM + +After: +Screenshot 2025-12-11 at 1 14 56 PM + + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +ping @kylehounslow + + +--- + +# PR #4939: Update 3.4 release note + +**URL:** https://github.com/opensearch-project/sql/pull/4939 + +**Author:** @ahkcs + +**Created:** 2025-12-10T23:31:09Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:26:09Z + +**Changes:** +8 -8 (1 files) + +**Labels:** `backport main`, `backport 3.4` + + +## Description + +### Description +3.4 release note update + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4938: [Backport 2.19-dev] backport markdown doctest support + +**URL:** https://github.com/opensearch-project/sql/pull/4938 + +**Author:** @kylehounslow + +**Created:** 2025-12-10T23:12:37Z + +**State:** MERGED + +**Merged:** 2025-12-11T18:44:20Z + +**Changes:** +19030 -17651 (150 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Backport markdown doctest support from https://github.com/opensearch-project/sql/pull/4912 to `2.19-dev` branch + +### Related Issues +Resolves merge conflicts blocking bot from creating auto backport PRs. + +### Check List + - [n/a] New functionality has javadoc added. +- [n/a] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [n/a] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4937: [Backport 2.19-dev] Enhance doc and error message handling for `bins` on time-related fields + +**URL:** https://github.com/opensearch-project/sql/pull/4937 + +**Author:** @ahkcs + +**Created:** 2025-12-10T21:34:10Z + +**State:** MERGED + +**Merged:** 2025-12-15T16:28:26Z + +**Changes:** +522 -1 (5 files) + + +## Description + +Enhance doc and error message handling for `bins` on time-related field (#4713) + +(cherry picked from commit ef4c51e0e15e6d8e5385ea3605c536775396fc39) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4936: [Backport 2.19-dev] Time Unit Unification for bin/stats (#4450) + +**URL:** https://github.com/opensearch-project/sql/pull/4936 + +**Author:** @ahkcs + +**Created:** 2025-12-10T20:05:12Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:22:11Z + +**Changes:** +599 -60 (5 files) + +**Labels:** `maintenance` + + +## Description + +(cherry picked from commit 5bb274740685a57d1798e70ab43f6859e3d7ee81) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4935: [Backport 3.4] Update 3.4 release Note (#4923) + +**URL:** https://github.com/opensearch-project/sql/pull/4935 + +**Author:** @ahkcs + +**Created:** 2025-12-10T18:56:39Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:29:58Z + +**Changes:** +157 -2 (2 files) + +**Labels:** `skip-changelog` + + +## Description + +Update 3.4 doc +(cherry picked from commit c87f99f5554e3e7edba2855bc2d0f219f4506c0d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4934: [Backport main] Update 3.4 release Note (#4923) + +**URL:** https://github.com/opensearch-project/sql/pull/4934 + +**Author:** @ahkcs + +**Created:** 2025-12-10T18:51:20Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:29:31Z + +**Changes:** +155 -0 (1 files) + +**Labels:** `skip-changelog` + + +## Description + +Update 3.4 doc +(cherry picked from commit c87f99f5554e3e7edba2855bc2d0f219f4506c0d) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4933: Extract unified query context for shared config management + +**URL:** https://github.com/opensearch-project/sql/pull/4933 + +**Author:** @dai-chen + +**Created:** 2025-12-10T17:32:25Z + +**State:** MERGED + +**Merged:** 2025-12-17T22:54:26Z + +**Changes:** +343 -215 (7 files) + +**Labels:** `bug`, `maintenance`, `backport 2.19-dev` + +**Assignees:** @dai-chen + + +## Description + +### Description + +This PR introduces `UnifiedQueryContext`, a reusable abstraction shared across unified query components (parser, planner, compiler, etc.). It centralizes configuration by constructing and bundling `CalcitePlanContext` and `Settings` into a single object. As a result, all unified query components can now read required configuration explicitly, resolving the configuration propagation issue tracked https://github.com/opensearch-project/sql/issues/4910. + +### Related Issues + +Resolves https://github.com/opensearch-project/sql/issues/4910. + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `api/README.md:None` + + +High-level Q: What session do you mean? I mean, how to define a session start and close? I didn't see any session management code at a glance. + + +### @dai-chen on `api/README.md:None` + + +I was thinking about session in SQL/Spark. Since we've not clearly defined it, let me remove it to avoid confusion. Thanks! + + +### @dai-chen on `api/README.md:None` + + +Addressed in https://github.com/opensearch-project/sql/pull/4933/changes/f650fb5fc90150d973cd4ac0c6adbbf984792ef6. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4932: Add feedback reminder for CodeRabbit + +**URL:** https://github.com/opensearch-project/sql/pull/4932 + +**Author:** @ykmr1224 + +**Created:** 2025-12-10T17:24:44Z + +**State:** MERGED + +**Merged:** 2025-12-11T22:01:28Z + +**Changes:** +28 -0 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Add feedback reminder for CodeRabbit + - When CodeRabbit made review comment, it automatically adds reminder message to leave feedback. `👋 Leave emoji reaction (👍/👎) to track effectiveness of CodeRabbit.` + - Later we can take stats on how many comments got positive/negative feedback via Github API, and make improvement based on the result. + +### Related Issues +n/a + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Just wonder is this doable in Coderabbit config? Just feel this workflow is very specific for this small task. + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @dai-chen + + +@ykmr1224 Also wondering is this knowledge base feature available to us? Instead of manual analysis, can we give feedback to Coderabbit directly? https://docs.coderabbit.ai/integrations/knowledge-base + + +### @ykmr1224 + + +> Just wonder is this doable in Coderabbit config? Just feel this workflow is very specific for this small task. + +Coderabbit don't have configuration for this kind of customization so far. + +> @ykmr1224 Also wondering is this knowledge base feature available to us? Instead of manual analysis, can we give feedback to Coderabbit directly? https://docs.coderabbit.ai/integrations/knowledge-base + +It is enabled and doable, but that relies on each developer giving appropriate feedback. +We should track overall effectiveness and improve that continuously. + + +### @dai-chen + + +> > Just wonder is this doable in Coderabbit config? Just feel this workflow is very specific for this small task. +> +> Coderabbit don't have configuration for this kind of customization so far. +> +> > @ykmr1224 Also wondering is this knowledge base feature available to us? Instead of manual analysis, can we give feedback to Coderabbit directly? https://docs.coderabbit.ai/integrations/knowledge-base +> +> It is enabled and doable, but that relies on each developer giving appropriate feedback. We should track overall effectiveness and improve that continuously. + +I agree. Either the knowledge base can separate team and personal preference, or we give permission to a small group. We can discuss offline. I'm just thinking we can maintain a single source of coding guidance, team knowledge between local Dev agent and this Review agent. Thanks! cc: @penghuo + + +--- + +# PR #4930: [Backport 2.19-dev] Support composite aggregation paginating (#4884) + +**URL:** https://github.com/opensearch-project/sql/pull/4930 + +**Author:** @LantaoJin + +**Created:** 2025-12-10T10:01:39Z + +**State:** MERGED + +**Merged:** 2025-12-11T02:38:44Z + +**Changes:** +1121 -532 (190 files) + + +## Description + +(cherry picked from #4884 commit 9930665c372f433eea4aeb04b5a4cfcd51be3e9e) + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4929: Pushdown join with `max=n` option to TopHits aggregation + +**URL:** https://github.com/opensearch-project/sql/pull/4929 + +**Author:** @LantaoJin + +**Created:** 2025-12-10T09:09:26Z + +**State:** MERGED + +**Merged:** 2025-12-12T06:36:57Z + +**Changes:** +170 -132 (27 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `backport 2.19-dev` + + +## Description + +### Description +Pushdown join with `max=n` option to TopHits aggregation: +- The right side subsearch with `max=n` will be converted to TopHits aggregation. +- For inner join, the `SortMergeJoin` may be converted to `HashJoin` by reordering the sides of join +- For non-inner join, the right side will be fully pushed down to DSL, rather than executing `WindowFunction` in memory. + +### Related Issues +Resolves #4927 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +Remove it? + + +### @LantaoJin on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:None` + + +sure. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +The behavior of system limit has changed from `limitation of source` to `limitation of the results after top hits`. + +So if we cannot push down the window, it will scan all rows from the source. @LantaoJin + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +I corrected the behavior since the define of `plugins.ppl.join.subsearch_maxout` is + +> The size configures the maximum of rows from subsearch to join against. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +Can we ensure the window will always be pushed down? Otherwise it will get regression than before? + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml:23` + + +No if (1) the join keys contain text (not keyword); (2) the join keys contain expression (I am working on support #4789 which could resolve it) + + +## General Comments + + +### @LantaoJin + + +> core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java (1)
    +> +> `1323-1374`: **Address the gap between documentation and SEMI/ANTI join implementation.** +> +> The documentation allows `max` option with `semi` and `anti` join types, but the code returns early for SEMI/ANTI joins (lines 1328-1332) before processing the `max` option and applying dedup/limit optimizations. This creates an inconsistency: users can specify `max` with SEMI/ANTI joins per the documented syntax, but it will be silently ignored. +> +> Either remove the early return to enable `max` support for SEMI/ANTI joins, or add a validation error when `max` is specified with SEMI/ANTI, and update documentation to clarify the limitation. + +@coderabbitai the `max` option takes no effect for `semi` and `anti` join types because `semi` and `anti` joins just use left side to filter the records in left side. So the join results in whatever max=1, max=2 or max=∞ are totally same. + + +--- + +# PR #4928: [Backport 2.19-dev] Support sort expression pushdown for SortMergeJoin(#4830) + +**URL:** https://github.com/opensearch-project/sql/pull/4928 + +**Author:** @songkant-aws + +**Created:** 2025-12-10T08:37:30Z + +**State:** MERGED + +**Merged:** 2025-12-11T07:35:34Z + +**Changes:** +437 -133 (17 files) + + +## Description + +### Description +Backport #4830 to 2.19-dev + +### Related Issues + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4926: Replace duplicated aggregation logic with `aggregateWithTrimming()` + +**URL:** https://github.com/opensearch-project/sql/pull/4926 + +**Author:** @ishaoxy + +**Created:** 2025-12-10T06:36:20Z + +**State:** MERGED + +**Merged:** 2025-12-11T07:31:01Z + +**Changes:** +65 -76 (10 files) + +**Labels:** `backport-manually`, `backport-failed`, `backport 2.19-dev`, `bugFix` + + +## Description + +### Description + Just use `aggregateWithTrimming()` to avoid duplicating existing functionality. + +### Related Issues +Resolves #4925 + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1833` + + +@yuancu @songkant-aws we'd better not to call `context.relBuilder.aggregate` directly, instead, use `aggregateWithTrimming` to build aggregation in stack. I see some codes in `visitChart`, `rankByColumnSplit` and `visitPatterns`. Can you create a issue to replace? + + +### @LantaoJin on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1833` + + +Or co-auther with @ishaoxy to address them in this PR. + + +### @yuancu on `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java:1833` + + +I did so because I needed to build multiple aggregations for the chart command. `aggregateWithTrimming` works on PPL's AST (composed of `UnresolvedPlan`) and creates RexNode AST, but I have already gone through this in the first aggregation. Therefore, I had to call `relBuilder.aggregate` the second time I created an aggregation to aggregate on RexNode. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4924: Remove all AccessController refs + +**URL:** https://github.com/opensearch-project/sql/pull/4924 + +**Author:** @Swiddis + +**Created:** 2025-12-09T20:27:42Z + +**State:** MERGED + +**Merged:** 2025-12-10T17:11:58Z + +**Changes:** +377 -581 (25 files) + +**Labels:** `maintenance` + + +## Description + +### Description +Removes all uses of AccessController, which is deprecated since version 3.0. Finishing what #4900 started. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java:138` + + +@coderabbitai can you commit this? + + +### @Swiddis on `direct-query/src/main/java/org/opensearch/sql/directquery/transport/model/ExecuteDirectQueryActionResponse.java:112` + + +Will skip this, it's pre-existing behavior and I'm not sure what the impact would be to change it + +Long-term: use a report wrapper #4919 + + +### @Swiddis on `opensearch/src/main/java/org/opensearch/sql/opensearch/security/SecurityAccess.java:None` + + +Will remove this class entirely since it's just another doPrivileged wrapper + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4923: Update 3.4 release Note + +**URL:** https://github.com/opensearch-project/sql/pull/4923 + +**Author:** @ahkcs + +**Created:** 2025-12-09T17:44:53Z + +**State:** MERGED + +**Merged:** 2025-12-09T21:16:26Z + +**Changes:** +92 -10 (2 files) + +**Labels:** `PPL`, `backport main`, `backport-failed`, `skip-changelog`, `backport 3.4` + + +## Description + +### Description +Update 3.4 release Note + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:9` + + +Keep the message consistent with other features. +Change it to support `replace` + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:48` + + +Shall we add all new functions under the features section? + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:26` + + +Should be under features sections + + +### @anasalkouz on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:27` + + +All new eval functions to be added under the feature section + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:9` + + +Updated + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:48` + + +Moved eval functions to features section + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:26` + + +Updated + + +### @ahkcs on `release-notes/opensearch-sql.release-notes-3.4.0.0.md:27` + + +Moved eval functions to features section + + +## General Comments + + +### @ahkcs + + +Resolving the comments in a new PR, which will also be backported: +https://github.com/opensearch-project/sql/pull/4939 + + +--- + +# PR #4922: [DOC] Callout the aggregation result may be approximate + +**URL:** https://github.com/opensearch-project/sql/pull/4922 + +**Author:** @LantaoJin + +**Created:** 2025-12-09T09:43:15Z + +**State:** MERGED + +**Merged:** 2025-12-11T08:34:18Z + +**Changes:** +58 -1 (3 files) + +**Labels:** `documentation`, `backport 2.19-dev` + + +## Description + +### Description +[DOC] Callout the aggregation result may be approximate + +### Related Issues +Resolves #4915 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @kylehounslow on `docs/user/ppl/cmd/stats.md:None` + + +Thanks for keeping our docs fresh! + +Note: If no output is provided, doctests might fail. Need to use "ignore" to omit from doctest. E.g. +```` +```ppl ignore +source=hits +| stats bucket_nullable=false count() as c by URL +| sort - c +| head 10 +``` +```` + +Else provide the expected output in a codeblock following the `ppl` block. Example: +```` +```ppl +search source=accounts | where age > 25 | fields firstname, lastname +``` + +Expected output: + +```text ++-------------+------------+ +| firstname | lastname | +|-------------+------------| +| Amber | Duke | +| Hattie | Bond | ++-------------+------------+ +``` +```` + + +See https://github.com/opensearch-project/sql/blob/5f963a0a0ae29e20d84306d3423daf104cddeb42/docs/dev/testing-doctest.md#markdown-format-new---currently-for-docsuserppl-only + + +### @LantaoJin on `docs/user/ppl/cmd/stats.md:None` + + +Thanks! `ignore` added. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4920: [2.19-dev] Disable Calcite by default + +**URL:** https://github.com/opensearch-project/sql/pull/4920 + +**Author:** @LantaoJin + +**Created:** 2025-12-09T03:12:30Z + +**State:** MERGED + +**Merged:** 2025-12-09T06:50:34Z + +**Changes:** +93 -1 (8 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Disable Calcite by default in 2.19-dev + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4918: [Backport 2.19-dev] Support `split` eval function + +**URL:** https://github.com/opensearch-project/sql/pull/4918 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-08T21:48:13Z + +**State:** MERGED + +**Merged:** 2025-12-10T23:27:05Z + +**Changes:** +218 -0 (8 files) + + +## Description + +Backport 5dca84f73315aafee61878c279bc7ba904c18be1 from #4814. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4917: [Backport 2.19-dev] Add unified query transpiler API + +**URL:** https://github.com/opensearch-project/sql/pull/4917 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-08T20:02:24Z + +**State:** MERGED + +**Merged:** 2025-12-08T21:22:15Z + +**Changes:** +238 -39 (8 files) + + +## Description + +Backport d4daa34d83130429f44f14ecd703e070782c13c7 from #4871. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4916: [Backport 2.19-dev] Implement one-batch lookahead for index enumerators (#4345) + +**URL:** https://github.com/opensearch-project/sql/pull/4916 + +**Author:** @Swiddis + +**Created:** 2025-12-08T17:46:50Z + +**State:** MERGED + +**Merged:** 2025-12-10T09:36:39Z + +**Changes:** +468 -74 (21 files) + + +## Description + +### Description +Backport #4345 as there's no perf regression in OSB. In general, I'm confident that this helps it many query cases and doesn't overall hurt the general case. + +Had to convert the Scanner away from record types because of JDK compatibility, should be no functionality diff. + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @Swiddis on `prometheus/src/test/java/org/opensearch/sql/prometheus/storage/PrometheusStorageFactoryTest.java:133` + + +test.com just dropped out of DNS, leading to URI validation failures. Quick fix. + + +## General Comments + + +### @LantaoJin + + +@Swiddis I still see the compile errors in CI + + +--- + +# PR #4914: RexCall and RelDataType standardization for script push down + +**URL:** https://github.com/opensearch-project/sql/pull/4914 + +**Author:** @qianheng-aws + +**Created:** 2025-12-08T03:56:22Z + +**State:** MERGED + +**Merged:** 2025-12-12T02:48:16Z + +**Changes:** +303 -79 (43 files) + +**Labels:** `enhancement`, `backport-manually`, `backport-failed`, `pushdown`, `backport 2.19-dev` + + +## Description + +### Description +This PR continues https://github.com/opensearch-project/sql/pull/4795 work, it includes change: +1. Implement RexCall standardization by using `RexNormalize.normalize` +2. Implement RelDataType standardization by widening the type, see details in `RexStandardizer::widenType` +3. Support Sarg literal value by expanding `SEARCH` to conjunction expressions. +4. Support decimal literal value by downgrading to double value. + + +### Related Issues +Resolves #4757 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:109` + + +`helper.stack` here is an `ArrayDeque`, while used as a stack + + + +### @LantaoJin on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +can you add a IT to verify the generated script with skip encoding via format=extended. + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +We have test `testSkipScriptEncodingOnExtendedFormat`. Do you mean set `format=extended` for `agg_case_cannot_push`? + + +### @qianheng-aws on `integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml:9` + + +Added in test case `testRexStandardizationForScript` + +The script is formatted below, with several changes: +- `>=` -> `<=` +- SEARCH -> AND(...) +- INT nullable=false -> BIGINT nullable=true +- CHAR[xxx] nullable=false -> VARCHAR nullable=true + +``` +{ + "op": { + "name": "CASE", + "kind": "CASE", + "syntax": "SPECIAL" + }, + "operands": [ + { + "op": { + "name": "<", + "kind": "LESS_THAN", + "syntax": "BINARY" + }, + "operands": [ + { + "dynamicParam": 0, + "type": { + "type": "BIGINT", + "nullable": true + } + }, + { + "dynamicParam": 1, + "type": { + "type": "BIGINT", + "nullable": true + } + } + ] + }, + { + "dynamicParam": 2, + "type": { + "type": "VARCHAR", + "nullable": true, + "precision": -1 + } + }, + { + "op": { + "name": "AND", + "kind": "AND", + "syntax": "BINARY" + }, + "operands": [ + { + "op": { + "name": "<=", + "kind": "LESS_THAN_OR_EQUAL", + "syntax": "BINARY" + }, + "operands": [ + { + "dynamicParam": 3, + "type": { + "type": "BIGINT", + "nullable": true + } + }, + { + "dynamicParam": 4, + "type": { + "type": "BIGINT", + "nullable": true + } + } + ] + }, + { + "op": { + "name": "<=", + "kind": "LESS_THAN_OR_EQUAL", + "syntax": "BINARY" + }, + "operands": [ + { + "dynamicParam": 5, + "type": { + "type": "BIGINT", + "nullable": true + } + }, + { + "dynamicParam": 6, + "type": { + "type": "BIGINT", + "nullable": true + } + } + ] + } + ] + }, + { + "dynamicParam": 7, + "type": { + "type": "VARCHAR", + "nullable": true, + "precision": -1 + } + }, + { + "dynamicParam": 8, + "type": { + "type": "VARCHAR", + "nullable": true, + "precision": -1 + } + } + ] +} +``` + + +### @qianheng-aws on `integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java:2136` + + +We should avoid reusing `FORMAT` parameter for `EXPLAIN OPTION` and add a new parameter for it. `EXPLAIN OPTION` affects the content of this API while `FORMAT` only affects the format like JSON, CSV, YAML. + +@coderabbitai Could you create an issue to track this? + + +### @yuancu on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:67` + + +I encountered a similar problem -- `Sarg` can be serialized but can not be properly deserialized. I addressed it by replacing `Sarg sarg = sargFromJson((Map) literal)` with `Sarg sarg = sargFromJson((Map) literal, type)` in `ExtendedRelJson.java`. Maybe it helps. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:67` + + +The problem I met here is I cannot translate a Sarg literal into a java object by using `RexToLixTranslator.translateLiteral`. Thus, we cannot generate a parameter for it. + + +### @qianheng-aws on `opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java:67` + + +From the implementation of `RexToLixTranslator `, it seems Sarg can only be used as literal in RexNode expression but not DynamicParams. + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4912: Migrate PPL Documentation from RST to Markdown + +**URL:** https://github.com/opensearch-project/sql/pull/4912 + +**Author:** @kylehounslow + +**Created:** 2025-12-06T00:36:33Z + +**State:** MERGED + +**Merged:** 2025-12-09T21:52:43Z + +**Changes:** +20383 -16782 (157 files) + +**Labels:** `maintenance` + + +## Description + +## Description + +This PR converts all PPL documentation under `docs/user/ppl/` from reStructuredText to Markdown format, enabling automated export to the main [OpenSearch +documentation website](https://docs.opensearch.org/latest/about/) at [opensearch-project/documentation-website](https://github.com/opensearch-project/documentation-website). + +**Important Note: All existing doctest coverage has been migrated successfully and all existing GitHub-based documentation remains intact and fully functional. This change enables PPL documentation to appear on the main OpenSearch docs site while preserving the existing GitHub-based documentation experience. See demo below.** + +### Live Demo: +* GitHub docs: https://github.com/kylehounslow/sql/blob/feat/markdown-doctests/docs/user/ppl/index.md +* Main docs build: + +### Why? +* Enables automatic export of PPL docs to main OpenSearch documentation site. Currently changes are made via manual copy/paste resulting in stale, inconsistent docs. +* Improves discoverability of new/existing PPL commands and functionality. +* Improves developer experience with clean, copy-able PPL code snippets. + +### Related Issues +* https://github.com/opensearch-project/sql/issues/4854 +* https://github.com/opensearch-project/documentation-website/pull/11621 + +### Summary of Changes + +#### Documentation Format Migration +* Converted 70+ RST files to Markdown across all PPL documentation sections +* Updated `docs/category.json` to reflect new file structure +* Removed shell prefixes and output from code blocks for clean copy-paste +* **GitHub documentation experience unchanged** - same content, same navigation, rendered by GitHub's native Markdown support + +#### Doctest Changes +* Added `markdown_parser.py` to support doctest execution on Markdown code blocks +* Extended existing doctest framework to handle both RST and Markdown formats +* All existing tests pass with new parser + +#### Export Tooling +* `export_to_docs_website.py` - Jekyll-compatible export to inject proper front-matter, etc while preserving exact docs structure from `docs/user/ppl`. + * Note: main docs page has already promoted SQL and PPL section to top-level to accommodate this: https://github.com/opensearch-project/documentation-website/pull/11621 +* Conversion scripts (only run once. Kept for reference/re-use on remaining sql docs): + * `convert_rst_to_md.py` - Automated RST to Markdown conversion + * `fix_markdown_formatting.py` - Post-conversion cleanup and standardization to ensure proper Jekyll rendering + +### Future PR +* Migrate remaining `docs/user/sql` to markdown. + +### Check List + - [n/a] New functionality has javadoc added. + - [n/a] New functionality has a user manual doc added. +- [n/a] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @ahkcs + + +UT failure due to test.com is down +Fix: https://github.com/opensearch-project/sql/pull/4916/commits/afcfbd951d4b387106047c363f770fc43ad021c2 + + +--- + +# PR #4908: [Backport 2.19-dev] Error handling for dot-containing field names + +**URL:** https://github.com/opensearch-project/sql/pull/4908 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-05T00:41:37Z + +**State:** MERGED + +**Merged:** 2025-12-05T05:56:45Z + +**Changes:** +384 -7 (4 files) + + +## Description + +Backport 8126367a787b121e4467e3467d9f158a421290c0 from #4907. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4907: Error handling for dot-containing field names + +**URL:** https://github.com/opensearch-project/sql/pull/4907 + +**Author:** @ahkcs + +**Created:** 2025-12-04T17:57:22Z + +**State:** MERGED + +**Merged:** 2025-12-05T00:12:19Z + +**Changes:** +384 -7 (4 files) + +**Labels:** `PPL`, `backport 2.19-dev`, `bugFix` + + +## Description + +

    Summary

    +

    Resolves #4896ArrayIndexOutOfBoundsException when querying an index containing malformed field names (e.g., ".", "..", ".a", "a.", "a..b") inside disabled object fields.

    +

    Disabled objects ("enabled": false) bypass field-name validation, allowing malformed names to be indexed and subsequently causing crashes in the SQL/PPL engines.

    +
    +

    Root Cause

    +

    OpenSearchExprValueFactory.JsonPath constructs field paths using:

    +
    rawPath.split("\\.");
    +
    +

    For malformed field names, split("\\.") behaves unexpectedly:

    + +Field Name | Result of split() | Issue +-- | -- | -- +".", ".." | [] (empty array) | dot-only → paths.get(0) crashes +".a" | ["", "a"] | leading dot → empty path segment +"a." | ["a"] (trailing empty removed) | trailing dot silently lost +"a..b" | ["a", "", "b"] | consecutive dots → empty segment + + + + + + + +## Summary by CodeRabbit + +## Release Notes + +* **Bug Fixes** + * Improved handling of queries on object fields containing malformed field names (dot-only names, leading/trailing dots, or consecutive dots). Invalid fields now return null while valid fields remain accessible. + +* **Documentation** + * Added documentation describing limitations when querying object fields with malformed field names and recommendations to avoid problematic naming patterns. + +✏️ Tip: You can customize this high-level summary in your review settings. + + + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +### @penghuo on `docs/user/ppl/limitations/limitations.rst:None` + + +> disabled object field, + +remove disabled. + +> PPL queries will return ``null`` for those specific fields. + +PPL ignore malformed fieldname. + + +### @penghuo on `docs/user/ppl/limitations/limitations.rst:None` + + +malformed names field are ignored. + + +### @ahkcs on `docs/user/ppl/limitations/limitations.rst:None` + + +Updated + + +### @ahkcs on `docs/user/ppl/limitations/limitations.rst:None` + + +Updated + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4902: [Backport 2.19-dev] Support timeouts for Calcite queries (#4857) + +**URL:** https://github.com/opensearch-project/sql/pull/4902 + +**Author:** @Swiddis + +**Created:** 2025-12-03T21:42:17Z + +**State:** MERGED + +**Merged:** 2025-12-05T05:57:49Z + +**Changes:** +236 -45 (20 files) + +**Labels:** `enhancement` + + +## Description + +### Description +Backport #4857 to 2.19-dev + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @LantaoJin + + +@Swiddis Can you set an appropriate commit message when you enable automatic merging next time? + + +--- + +# PR #4901: Adjust CodeRabbit review config + +**URL:** https://github.com/opensearch-project/sql/pull/4901 + +**Author:** @ykmr1224 + +**Created:** 2025-12-03T21:28:19Z + +**State:** MERGED + +**Merged:** 2025-12-04T17:29:30Z + +**Changes:** +6 -5 (1 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Adjust CodeRabbit review config to enable auto review. +- Some more minor changes. + +### Related Issues +https://github.com/opensearch-project/sql/issues/4889 +https://github.com/opensearch-project/.github/issues/412 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4900: Remove doPrivileged call in Calcite script execution + +**URL:** https://github.com/opensearch-project/sql/pull/4900 + +**Author:** @Swiddis + +**Created:** 2025-12-03T19:35:46Z + +**State:** MERGED + +**Merged:** 2025-12-04T19:31:01Z + +**Changes:** +2 -7 (1 files) + +**Labels:** `maintenance`, `performance`, `calcite` + + +## Description + +### Description +Seems to delete a major perf bottleneck: we create an access controller context for every individual document we process in a script, which involves a lot of stack trace traversal and locking for accessing that context. Curious what the breakage is if I just delete this controller step, since it passes tests locally. At least for the query I was interested in, it reduced the runtime from 70 seconds (2mil documents) to 4. + +Better approach if we need it: move the privilege step to outside the core script loop. + +Measured from slow query (big5 dataset): +``` +source = big5 +| where `agent.name` = 'filebeat' +| where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` <= '2023-01-02 00:00:00' +| where `metrics.size` > 1000 and `metrics.size` != 5000 +| stats count(`agent.name`) +``` + +Before (70s/query): +image + +After (4s/query): +image + + +### Related Issues +N/A + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @dai-chen - APPROVED + + +Thanks for the changes! If I recall right, this is mostly for action whitelisted in plugin-security.policy? + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +### @songkant-aws + + +Grreat! I remember somehow 2.19 still needs it. But it was removed in OpenSearch since 3.x version. + + +--- + +# PR #4895: [Backport 2.19-dev] [BugFix] Fix Memory Exhaustion for Multiple Filtering Operations in PPL + +**URL:** https://github.com/opensearch-project/sql/pull/4895 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-02T21:41:24Z + +**State:** MERGED + +**Merged:** 2025-12-02T23:38:55Z + +**Changes:** +291 -126 (37 files) + + +## Description + +Backport 52fe8aa5bd19e8008e4c11bcbd2ea69946b5724e from #4841. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4891: [Backport 2.19-dev] Add hashCode() and equals() to the value class of ExprJavaType + +**URL:** https://github.com/opensearch-project/sql/pull/4891 + +**Author:** @app/opensearch-trigger-bot + +**Created:** 2025-12-02T00:46:19Z + +**State:** MERGED + +**Merged:** 2025-12-02T02:28:45Z + +**Changes:** +88 -1 (3 files) + + +## Description + +Backport 96370bfa573831d046db5f5c7029113460cbbb11 from #4885. + + + +## Reviews + + +_No human reviews with comments_ + + +## Review Comments + + +_No inline code comments from humans_ + + +## General Comments + + +_No general comments from humans_ + + +--- + +# PR #4890: Add config for CodeRabbit review + +**URL:** https://github.com/opensearch-project/sql/pull/4890 + +**Author:** @ykmr1224 + +**Created:** 2025-12-01T23:59:46Z + +**State:** MERGED + +**Merged:** 2025-12-02T17:27:18Z + +**Changes:** +185 -0 (2 files) + +**Labels:** `infrastructure` + + +## Description + +### Description +- Add config files for CodeRabbit review (AI review assistant) + - Requested use of Amazon Q, but there were discussion in LF and they decided to use CodeRabbit. (https://github.com/opensearch-project/.github/issues/412) +- Disable auto review for now (enable once we establish configuration). + +### Related Issues +https://github.com/opensearch-project/sql/issues/4889 +https://github.com/opensearch-project/.github/issues/412 + +### Check List +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). + + + +## Reviews + + +### @RyanL1997 - APPROVED + + +Hi @ykmr1224, thanks for the change. LGTM. Left a minor question. + + +## Review Comments + + +### @RyanL1997 on `.coderabbit.yaml:22` + + +Nice, I think we can also start exercising this in our PR template when we initially creating PR. + + +### @RyanL1997 on `.rules/REVIEW_GUIDELINES.md:48` + + +Is this rule specific for command development? + + +### @ykmr1224 on `.rules/REVIEW_GUIDELINES.md:48` + + +No, this is generic for any code change in this repository. + + +### @Swiddis on `.coderabbit.yaml:69` + + +Not sure if I like this setting, maybe others like it though? + +Usually when responding to AI comments the only audience is other reviewers (e.g. "this comment doesn't apply" or "implemented"), I'm not sure I see the value of the AI reply + + +## General Comments + + +_No general comments from humans_ + + +--- + diff --git a/.kiro/scripts/pr_collector/README.md b/.kiro/scripts/pr_collector/README.md new file mode 100644 index 0000000000..b7d543ccde --- /dev/null +++ b/.kiro/scripts/pr_collector/README.md @@ -0,0 +1,88 @@ +# PR Review Collector + +## Quick Start + +### Prerequisites +1. Install GitHub CLI: + ```bash + brew install gh # macOS + ``` + +2. Authenticate: + ```bash + gh auth login + ``` + +### Usage + +**Basic collection:** +```bash +python3 ~/.kiro/scripts/pr_collector/pr_collector.py \ + --repo opensearch-project/sql \ + --start-date 2025-01-01 \ + --end-date 2025-01-31 +``` + +**With filters:** +```bash +python3 ~/.kiro/scripts/pr_collector/pr_collector.py \ + --repo opensearch-project/sql \ + --start-date 2025-01-01 \ + --end-date 2025-01-31 \ + --state merged \ + --limit 20 +``` + +### Options +- `--repo`: Repository (owner/repo) - REQUIRED +- `--start-date`: Start date (YYYY-MM-DD) - REQUIRED +- `--end-date`: End date (YYYY-MM-DD) - REQUIRED +- `--state`: PR state filter (open, closed, merged, all) - default: all +- `--limit`: Maximum number of PRs to collect + +### Output +Data is saved to: `~/.kiro/resources/{repo-name}/YYYY-MM-DD-TO-YYYY-MM-DD.md` + +### Using with Kiro Agent +The `pr-collector` agent provides an interactive interface: +```bash +kiro-cli chat --agent pr-collector +``` + +Then simply describe what you want: +- "Collect PRs from opensearch-project/sql for January 2025" +- "Get merged PRs from last month, limit to 20" +- "Analyze the collected data" + +## What Gets Collected + +For each PR: +- Metadata (number, title, author, dates, state) +- Description +- Files changed (with additions/deletions) +- Reviews (with approval/rejection status) +- Review comments (inline code comments) +- General comments (discussion thread) +- Labels and assignees + +## Troubleshooting + +**GitHub CLI not found:** +```bash +gh --version # Check if installed +brew install gh # Install on macOS +``` + +**Authentication issues:** +```bash +gh auth status # Check auth status +gh auth login # Login if needed +``` + +**Rate limiting:** +- Use `--limit` to reduce API calls +- Authenticated requests have higher limits +- Break large collections into smaller date ranges + +--- +**Version:** 1.0.0 diff --git a/.kiro/scripts/pr_collector/pr_collector.py b/.kiro/scripts/pr_collector/pr_collector.py new file mode 100755 index 0000000000..15cf766446 --- /dev/null +++ b/.kiro/scripts/pr_collector/pr_collector.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +""" +GitHub PR Review Collector +Collects PR metadata, reviews, and comments for training PR review agents. +""" + +import os +import json +import argparse +from datetime import datetime +from typing import List, Dict, Optional +import subprocess + + +def run_gh_command(cmd: List[str]) -> str: + """Execute GitHub CLI command and return output.""" + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True + ) + return result.stdout + except subprocess.CalledProcessError as e: + print(f"Error running command: {' '.join(cmd)}") + print(f"Error: {e.stderr}") + raise + + +def get_pr_list(repo: str, start_date: str, end_date: str, state: str = "all") -> List[Dict]: + """Get list of PRs in date range.""" + cmd = [ + "gh", "pr", "list", + "--repo", repo, + "--state", state, + "--limit", "1000", + "--json", "number,title,author,createdAt,closedAt,mergedAt,state,url" + ] + + output = run_gh_command(cmd) + prs = json.loads(output) + + # Filter by date range + filtered_prs = [] + start = datetime.fromisoformat(start_date.replace('Z', '+00:00')) + end = datetime.fromisoformat(end_date.replace('Z', '+00:00')) + + for pr in prs: + pr_date = datetime.fromisoformat(pr['createdAt'].replace('Z', '+00:00')) + if start <= pr_date <= end: + filtered_prs.append(pr) + + return filtered_prs + + +def get_pr_details(repo: str, pr_number: int) -> Dict: + """Get detailed PR information including reviews and comments.""" + # Get PR details + cmd = [ + "gh", "pr", "view", str(pr_number), + "--repo", repo, + "--json", "number,title,body,author,createdAt,closedAt,mergedAt,state,url,additions,deletions,changedFiles,labels,assignees,reviewRequests" + ] + pr_data = json.loads(run_gh_command(cmd)) + + # Get reviews + cmd = [ + "gh", "api", + f"/repos/{repo}/pulls/{pr_number}/reviews", + "--jq", "." + ] + reviews = json.loads(run_gh_command(cmd)) + + # Get review comments (inline code comments) + cmd = [ + "gh", "api", + f"/repos/{repo}/pulls/{pr_number}/comments", + "--jq", "." + ] + review_comments = json.loads(run_gh_command(cmd)) + + # Get issue comments (general PR comments) + cmd = [ + "gh", "api", + f"/repos/{repo}/issues/{pr_number}/comments", + "--jq", "." + ] + issue_comments = json.loads(run_gh_command(cmd)) + + # Get files changed + cmd = [ + "gh", "api", + f"/repos/{repo}/pulls/{pr_number}/files", + "--jq", "." + ] + files = json.loads(run_gh_command(cmd)) + + pr_data['reviews'] = reviews + pr_data['review_comments'] = review_comments + pr_data['issue_comments'] = issue_comments + pr_data['files'] = files + + return pr_data + + +def format_pr_markdown(pr_data: Dict) -> str: + """Format PR data as markdown.""" + md = [] + + # Header + md.append(f"# PR #{pr_data['number']}: {pr_data['title']}\n") + md.append(f"**URL:** {pr_data['url']}\n") + md.append(f"**Author:** @{pr_data['author']['login']}\n") + md.append(f"**Created:** {pr_data['createdAt']}\n") + md.append(f"**State:** {pr_data['state']}\n") + + if pr_data.get('mergedAt'): + md.append(f"**Merged:** {pr_data['mergedAt']}\n") + elif pr_data.get('closedAt'): + md.append(f"**Closed:** {pr_data['closedAt']}\n") + + md.append(f"**Changes:** +{pr_data.get('additions', 0)} -{pr_data.get('deletions', 0)} ({pr_data.get('changedFiles', 0)} files)\n") + + # Labels + if pr_data.get('labels'): + labels = [f"`{label['name']}`" for label in pr_data['labels']] + md.append(f"**Labels:** {', '.join(labels)}\n") + + # Assignees + if pr_data.get('assignees'): + assignees = [f"@{a['login']}" for a in pr_data['assignees']] + md.append(f"**Assignees:** {', '.join(assignees)}\n") + + # Description - filter out checklist sections + md.append("\n## Description\n") + body = pr_data.get('body', '_No description provided_') + if body and body != '_No description provided_': + # Remove checklist sections (lines starting with - [ ] or - [x]) + lines = body.split('\n') + filtered_lines = [] + in_checklist = False + for line in lines: + stripped = line.strip() + # Detect checklist items + if stripped.startswith('- [ ]') or stripped.startswith('- [x]') or stripped.startswith('- [X]'): + in_checklist = True + continue + # Skip empty lines immediately after checklist + if in_checklist and not stripped: + in_checklist = False + continue + in_checklist = False + filtered_lines.append(line) + body = '\n'.join(filtered_lines).strip() + md.append(body) + md.append("\n") + + # Skip Files Changed section + + # Reviews - filter out CodeRabbit and include comment content + md.append("\n## Reviews\n") + has_reviews = False + for review in pr_data.get('reviews', []): + reviewer = review['user']['login'] + + # Skip CodeRabbit reviews + if 'coderabbit' in reviewer.lower() or 'bot' in reviewer.lower(): + continue + + state = review['state'] + + # Only include reviews with actual comments + if review.get('body') and review['body'].strip(): + has_reviews = True + md.append(f"\n### @{reviewer} - {state}\n") + md.append(f"\n{review['body']}\n") + + if not has_reviews: + md.append("\n_No human reviews with comments_\n") + + # Review Comments (inline code comments) - filter out CodeRabbit + md.append("\n## Review Comments\n") + has_review_comments = False + for comment in pr_data.get('review_comments', []): + author = comment['user']['login'] + + # Skip CodeRabbit comments + if 'coderabbit' in author.lower() or 'bot' in author.lower(): + continue + + path = comment['path'] + line = comment.get('line', comment.get('original_line', 'N/A')) + + has_review_comments = True + md.append(f"\n### @{author} on `{path}:{line}`\n") + md.append(f"\n{comment['body']}\n") + + if not has_review_comments: + md.append("\n_No inline code comments from humans_\n") + + # Issue Comments (general PR comments) - filter out CodeRabbit + md.append("\n## General Comments\n") + has_general_comments = False + for comment in pr_data.get('issue_comments', []): + author = comment['user']['login'] + + # Skip CodeRabbit comments + if 'coderabbit' in author.lower() or 'bot' in author.lower(): + continue + + has_general_comments = True + md.append(f"\n### @{author}\n") + md.append(f"\n{comment['body']}\n") + + if not has_general_comments: + md.append("\n_No general comments from humans_\n") + + return '\n'.join(md) + + +def save_pr_data(repo: str, start_date: str, end_date: str, pr_data_list: List[Dict]): + """Save PR data to markdown file.""" + # Create directory structure + repo_name = repo.split('/')[-1] + date_range = f"{start_date[:10]}-TO-{end_date[:10]}" + + # Try workspace .kiro first, fall back to home directory + workspace_dir = os.path.join(os.getcwd(), ".kiro", "resources", repo_name) + home_dir = os.path.expanduser(f"~/.kiro/resources/{repo_name}") + + # Prefer workspace if .kiro exists in current directory + if os.path.exists(os.path.join(os.getcwd(), ".kiro")): + output_dir = workspace_dir + else: + output_dir = home_dir + + os.makedirs(output_dir, exist_ok=True) + + output_file = os.path.join(output_dir, f"{date_range}.md") + + with open(output_file, 'w') as f: + f.write(f"# PR Review Data: {repo}\n") + f.write(f"**Date Range:** {start_date[:10]} to {end_date[:10]}\n") + f.write(f"**Total PRs:** {len(pr_data_list)}\n") + f.write(f"**Generated:** {datetime.now().isoformat()}\n") + f.write("\n---\n\n") + + for pr_data in pr_data_list: + f.write(format_pr_markdown(pr_data)) + f.write("\n\n---\n\n") + + print(f"✓ Saved PR data to: {output_file}") + return output_file + + +def main(): + parser = argparse.ArgumentParser(description="Collect GitHub PR review data") + parser.add_argument("--repo", required=True, help="Repository (owner/repo)") + parser.add_argument("--start-date", required=True, help="Start date (YYYY-MM-DD)") + parser.add_argument("--end-date", required=True, help="End date (YYYY-MM-DD)") + parser.add_argument("--state", default="all", choices=["open", "closed", "merged", "all"], help="PR state filter") + parser.add_argument("--limit", type=int, help="Limit number of PRs to collect") + + args = parser.parse_args() + + # Convert dates to ISO format + start_date = f"{args.start_date}T00:00:00Z" + end_date = f"{args.end_date}T23:59:59Z" + + print(f"Collecting PRs from {args.repo}...") + print(f"Date range: {args.start_date} to {args.end_date}") + + # Get PR list + prs = get_pr_list(args.repo, start_date, end_date, args.state) + print(f"Found {len(prs)} PRs in date range") + + if args.limit: + prs = prs[:args.limit] + print(f"Limited to {len(prs)} PRs") + + # Collect detailed data for each PR + pr_data_list = [] + for i, pr in enumerate(prs, 1): + print(f"Collecting PR #{pr['number']} ({i}/{len(prs)})...") + try: + pr_details = get_pr_details(args.repo, pr['number']) + pr_data_list.append(pr_details) + except Exception as e: + print(f"Error collecting PR #{pr['number']}: {e}") + continue + + # Save to file + output_file = save_pr_data(args.repo, start_date, end_date, pr_data_list) + print(f"\n✓ Collection complete!") + print(f"✓ Collected {len(pr_data_list)} PRs") + print(f"✓ Output: {output_file}") + + +if __name__ == "__main__": + main() diff --git a/.kiro/steering/pr-collector.md b/.kiro/steering/pr-collector.md new file mode 100644 index 0000000000..b4bd29c2ef --- /dev/null +++ b/.kiro/steering/pr-collector.md @@ -0,0 +1,137 @@ +--- +inclusion: manual +--- + +# PR Review Collector + +## Quick Commands + +When user asks to collect PR data, IMMEDIATELY execute: + +```bash +python3 .kiro/scripts/pr_collector/pr_collector.py \ + --repo REPO_NAME \ + --start-date YYYY-MM-DD \ + --end-date YYYY-MM-DD \ + [--state merged|open|closed|all] \ + [--limit NUMBER] +``` + +**Example:** +```bash +python3 .kiro/scripts/pr_collector/pr_collector.py \ + --repo opensearch-project/sql \ + --start-date 2025-12-01 \ + --end-date 2025-12-31 +``` + +## Output Location +Data saved to: `.kiro/resources/{repo-name}/YYYY-MM-DD-TO-YYYY-MM-DD.md` + +## Prerequisites Check + +Before running, verify: +```bash +# Check GitHub CLI +gh --version + +# Check authentication +gh auth status +``` + +If not installed: `brew install gh` (macOS) +If not authenticated: `gh auth login` + +## Common Use Cases + +### Collect all PRs for a month +```bash +python3 .kiro/scripts/pr_collector/pr_collector.py \ + --repo opensearch-project/sql \ + --start-date 2025-12-01 \ + --end-date 2025-12-31 +``` + +### Collect only merged PRs, limited to 20 +```bash +python3 .kiro/scripts/pr_collector/pr_collector.py \ + --repo opensearch-project/sql \ + --start-date 2025-12-01 \ + --end-date 2025-12-31 \ + --state merged \ + --limit 20 +``` + +### Collect recent PRs (last 30 days) +```bash +python3 .kiro/scripts/pr_collector/pr_collector.py \ + --repo opensearch-project/sql \ + --start-date 2025-12-22 \ + --end-date 2026-01-21 +``` + +## What Gets Collected + +For each PR: +- Metadata (number, title, author, dates, state, URL) +- Description (checklist items filtered out) +- Reviews (human reviewers only, with full comment content) +- Review comments (inline code comments from humans only, with file/line context) +- General comments (discussion thread from humans only) +- Labels and assignees + +**Filtered Out:** +- Checklist items (- [ ] and - [x] lines) +- Files changed section +- CodeRabbit bot reviews and comments +- Timestamps (only reviewer name and content shown) + +## After Collection + +Once data is collected, you can: + +1. **Read the output file:** +```bash +cat .kiro/resources/sql/2025-12-01-TO-2025-12-31.md +``` + +2. **Analyze reviewers:** +```bash +grep "Review by @" .kiro/resources/sql/2025-12-01-TO-2025-12-31.md | sort | uniq -c | sort -rn +``` + +3. **Count PRs:** +```bash +grep "^# PR #" .kiro/resources/sql/2025-12-01-TO-2025-12-31.md | wc -l +``` + +4. **Find specific reviewer's comments:** +```bash +grep -A 5 "Review by @USERNAME" .kiro/resources/sql/2025-12-01-TO-2025-12-31.md +``` + +## Troubleshooting + +**Error: gh: command not found** +→ Install GitHub CLI: `brew install gh` + +**Error: authentication required** +→ Login: `gh auth login` + +**Error: API rate limit exceeded** +→ Use `--limit` to reduce requests or wait an hour + +**No PRs found** +→ Check date range and repo name are correct + +## Tips + +- Start with `--limit 10` to test +- Use `--state merged` to focus on completed PRs +- Break large date ranges into monthly chunks +- Authenticated requests have higher API limits (5000/hour vs 60/hour) + +--- + +**Script Location:** `.kiro/scripts/pr_collector/pr_collector.py` +**Version:** 1.0.0 From 268f77b3d97437245d5072c92f86725ce08eac4e Mon Sep 17 00:00:00 2001 From: Peng Huo Date: Wed, 28 Jan 2026 15:55:36 -0800 Subject: [PATCH 2/6] Fix #5067: Report syntax error for duplicate source/index keywords When a PPL query contains duplicate 'source' or 'index' keywords (e.g., 'source source=index_name'), the parser was accepting it as valid syntax, treating the first keyword as a search expression. This caused confusing errors later when OpenSearch tried to expand fields. This fix adds validation in AstBuilder.visitSearchFrom() to detect when reserved keywords 'source' or 'index' appear as search expressions before the fromClause. It now throws a clear SyntaxCheckException with a helpful error message suggesting the correct syntax. Signed-off-by: Peng Huo --- .../opensearch/sql/ppl/parser/AstBuilder.java | 21 ++++ .../sql/ppl/antlr/Issue5067Test.java | 95 +++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7ac29faf4c..49518bbf9e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -187,6 +187,9 @@ public UnresolvedPlan visitSubSearch(OpenSearchPPLParser.SubSearchContext ctx) { /** Search command. */ @Override public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { + // Validate search expressions don't contain reserved keywords + validateSearchExpressions(ctx.searchExpression()); + if (ctx.searchExpression().isEmpty()) { return visitFromClause(ctx.fromClause()); } else { @@ -218,6 +221,24 @@ public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { } } + /** + * Validates that search expressions don't contain reserved keywords like SOURCE or INDEX + * that should only appear in the fromClause. + */ + private void validateSearchExpressions( + List searchExprs) { + for (OpenSearchPPLParser.SearchExpressionContext expr : searchExprs) { + String text = expr.getText().toLowerCase(); + if (text.equals("source") || text.equals("index")) { + throw new SyntaxCheckException( + String.format( + "Unexpected keyword '%s' before source clause. " + + "Did you mean '%s=' or 'search %s='?", + text, text, text)); + } + } + } + /** * Describe command.
    * Current logic separates table and metadata info about table by adding MAPPING_ODFE_SYS_TABLE as diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java new file mode 100644 index 0000000000..c7e6f585f3 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java @@ -0,0 +1,95 @@ +package org.opensearch.sql.ppl.antlr; + +import static org.junit.Assert.assertThrows; + +import org.antlr.v4.runtime.tree.ParseTree; +import org.junit.Test; +import org.mockito.Mockito; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.ppl.parser.AstBuilder; +import org.opensearch.sql.ppl.parser.AstStatementBuilder; + +/** + * Unit test to reproduce issue #5067 + * https://github.com/opensearch-project/sql/issues/5067 + */ +public class Issue5067Test { + + private final Settings settings = Mockito.mock(Settings.class); + + @Test + public void testDuplicateSourceKeywordShouldThrowSyntaxError() { + // This query has duplicate "source" keyword: "source source=..." + // Expected: SyntaxCheckException + // Actual (bug): Query gets parsed and later throws QueryShardException + String query = "source source=test-index | head 10"; + PPLSyntaxParser parser = new PPLSyntaxParser(); + ParseTree tree = parser.parse(query); + + assertThrows( + SyntaxCheckException.class, + () -> tree.accept( + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build()))); + } + + @Test + public void testCorrectSourceSyntaxShouldParse() { + // This is the correct syntax + String query = "source=test-index | head 10"; + PPLSyntaxParser parser = new PPLSyntaxParser(); + ParseTree tree = parser.parse(query); + + // Should not throw + tree.accept( + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build())); + } + + @Test + public void testDuplicateIndexKeywordShouldThrowSyntaxError() { + // Similar issue with INDEX keyword + String query = "index index=test-index | head 10"; + PPLSyntaxParser parser = new PPLSyntaxParser(); + ParseTree tree = parser.parse(query); + + assertThrows( + SyntaxCheckException.class, + () -> tree.accept( + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build()))); + } + + @Test + public void testSearchWithDuplicateSourceShouldThrowSyntaxError() { + // With explicit SEARCH keyword + String query = "search source source=test-index | head 10"; + PPLSyntaxParser parser = new PPLSyntaxParser(); + ParseTree tree = parser.parse(query); + + assertThrows( + SyntaxCheckException.class, + () -> tree.accept( + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build()))); + } + + @Test + public void testSearchingForLiteralSourceValueShouldWork() { + // Searching for the literal word "source" in a field should still work + String query = "source=test-index field=\"source\" | head 10"; + PPLSyntaxParser parser = new PPLSyntaxParser(); + ParseTree tree = parser.parse(query); + + // Should not throw - this is a valid search for the literal word "source" + tree.accept( + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build())); + } +} From 62fe41c0a9517d5ed3bb83619b2750e13e336bad Mon Sep 17 00:00:00 2001 From: Peng Huo Date: Wed, 28 Jan 2026 15:56:43 -0800 Subject: [PATCH 3/6] kiro agents Signed-off-by: Peng Huo --- .kiro/agents/README.md | 103 ++++++++++++++++++++++++ .kiro/agents/ppl-doctor.json | 34 ++++++++ .kiro/agents/ppl-doctor.prompt.md | 116 +++++++++++++++++++++++++++ .kiro/agents/rca-fix-agent.json | 29 +++++++ .kiro/agents/rca-fix-agent.prompt.md | 74 +++++++++++++++++ 5 files changed, 356 insertions(+) create mode 100644 .kiro/agents/README.md create mode 100644 .kiro/agents/ppl-doctor.json create mode 100644 .kiro/agents/ppl-doctor.prompt.md create mode 100644 .kiro/agents/rca-fix-agent.json create mode 100644 .kiro/agents/rca-fix-agent.prompt.md diff --git a/.kiro/agents/README.md b/.kiro/agents/README.md new file mode 100644 index 0000000000..0f3cb84391 --- /dev/null +++ b/.kiro/agents/README.md @@ -0,0 +1,103 @@ +# ppl-doctor agents + +This folder contains the Kiro CLI agents for the PPL bug-fixing workflow. + +## Prerequisites +- OpenSearch + SQL plugin built from this repo +- A running local test cluster when reproducing bugs + +Start a local cluster: +```bash +./gradlew opensearch-sql:run +``` + +## Available agents +- `ppl-doctor` (orchestrator entry point) +- `issue-analyzer-agent` +- `reproducer-agent` +- `root-cause-agent` +- `fix-implementer-agent` +- `pr-commit-agent` + +List agents discovered in this repo: +```bash +kiro-cli agent list +``` + +## Orchestrator usage (recommended) +Run the orchestrator with a GitHub issue link: +```bash +kiro-cli chat --agent ppl-doctor --trust-all-tools +``` +Then provide a request envelope, for example: +```text +stage: issue-analyzer +issue_url: https://github.com/opensearch-project/sql/issues/5055 +context: + repo: opensearch-project/sql + local_repo_root: /Users/penghuo/oss/os-ppl +inputs: + sample_data_paths: [] + query: "" + expected: "" +constraints: + avoid_legacy: true + max_source_files: 30 +``` + +The orchestrator will delegate to sub-agents based on the stage. + +## Slack notifications +- The orchestrator uses santos-slack-mcp-server to notify channel `C0ABN6XRY7N` + when user input is required (for example: missing repro details). + +## Agent-by-agent usage (manual) +You can run each sub-agent directly using the same envelope format. + +Issue analysis: +```bash +kiro-cli chat --agent issue-analyzer-agent --trust-all-tools +``` + +Reproduce a bug: +```bash +kiro-cli chat --agent reproducer-agent --trust-all-tools +``` + +Root-cause analysis: +```bash +kiro-cli chat --agent root-cause-agent --trust-all-tools +``` + +Fix + test: +```bash +kiro-cli chat --agent fix-implementer-agent --trust-all-tools +``` + +Create PR + track review: +```bash +kiro-cli chat --agent pr-commit-agent --trust-all-tools +``` + +## Response envelope +Each agent responds with: +```text +stage: +status: +summary: +artifacts: + files_changed: [...] + commands_run: [...] + tests_run: [...] +notes: + risks: + followups: +``` + +## Tips +- If a repro fails, capture exact OpenSearch version and mappings in your input. +- Keep scope minimal: if a fix would touch more than 30 non-test files, pause and confirm. +- For issue #5055, the repro data and queries are already in the issue body. + +## TODO +- Slack integration is tracked in `TODO.md`. diff --git a/.kiro/agents/ppl-doctor.json b/.kiro/agents/ppl-doctor.json new file mode 100644 index 0000000000..b32ba0f05c --- /dev/null +++ b/.kiro/agents/ppl-doctor.json @@ -0,0 +1,34 @@ +{ + "name": "ppl-doctor", + "prompt": "file://./ppl-doctor.prompt.md", + "description": "Orchestrator for PPL intake/repro/PR; delegates RCA+fix to rca-fix-agent.", + "includeMcpJson": true, + "tools": [ + "@builtin", + "@github", + "@santos-slack-mcp-server" + ], + "toolAliases": {}, + "allowedTools": [ + "read", + "write", + "shell", + "@github/*", + "@santos-slack-mcp-server/*" + ], + "resources": [ + "file:///Users/penghuo/oss/os-ppl/README.md", + "file:///Users/penghuo/oss/os-ppl/DEVELOPER_GUIDE.rst", + "file:///Users/penghuo/oss/os-ppl/CONTRIBUTING.md", + "file:///Users/penghuo/oss/os-ppl/docs/dev" + ], + "hooks": {}, + "toolsSettings": { + "write": { + "allowedPaths": [ + "/Users/penghuo/oss/os-ppl/**" + ] + } + }, + "model": "claude-sonnet-4.5-1m" +} diff --git a/.kiro/agents/ppl-doctor.prompt.md b/.kiro/agents/ppl-doctor.prompt.md new file mode 100644 index 0000000000..a3858bb9c6 --- /dev/null +++ b/.kiro/agents/ppl-doctor.prompt.md @@ -0,0 +1,116 @@ +# ppl-doctor + +You are the top-level orchestration agent for fixing OpenSearch PPL bugs in the +opensearch-project/sql repository. Follow the workflow below and keep the user +informed at each gate, emit status, evidence, next action. The items marked **MANDATORY** are hard requirements and +must not be skipped or re-ordered. You delegate coding work to `rca-fix-agent` +when triggered; otherwise you may inline quick tasks. +You can read local repo at /Users/penghuo/oss/os-ppl, run shell commands, and use github and slack MCP. +If a required tool is unavailable: stop at the current gate and request the minimum user action (e.g., provide issue link / run commands and paste output). + +## Goals +- Select a valid bug issue (or validate the provided issue link). +- Reproduce the issue using the sample data and query from the issue. +- If not reproducible, ask for clarifications and notify a configured Slack channel. +- If reproducible, perform root cause analysis, implement a fix, and verify via tests. +- Avoid regressions and keep changes minimal. +- Create a PR with the repo template and track review feedback. +- If no review response in 12 hours, ping reviewers in Slack. +- If any stage needs user input, send a Slack notification to channel C0ABN6XRY7N. + +## Inputs +- Preferred: a GitHub issue link from opensearch-project/sql. +- If no link is provided, auto-select an open issue labeled `bug` with no open PR. + +## Workflow (MANDATORY GATES) +1) Issue intake and validation (**MANDATORY**) + - If the user provides an issue link, confirm it is in opensearch-project/sql, is open, and has the `bug` label. + - If no issue link is provided, query for open `bug` issues with no linked/open PRs. If tooling is missing, ask the user to provide a link. + - Confirm no one is actively working on it (linked PRs, assignee set, or recent comments in last 14 days says “working on this). + +2) Reproduce EXACTLY as described (**MANDATORY**) + - Extract sample data, mappings, and queries from the issue. + - Follow the user-specified scenario **verbatim** (field counts, query text, cluster settings). Do not substitute smaller datasets unless the user approves. + - Create a yamlRestTest include (index creation + ingest + query). + - Use `./gradlew opensearch-sql:run` to launch a local test cluster. + - Run the query and compare actual vs expected output. + +3) If reproduction fails (**MANDATORY PAUSE**) + - Draft a focused clarification question (versions, mappings, sample data, settings). + - Send a short Slack notification with the question and the repro steps attempted to channel C0ABN6XRY7N using santos-slack-mcp-server. + - halt after producing the clarification + Slack draft and wait for confirmation before proceeding. + +4) RCA + Fix + Verification (**DELEGATE TO rca-fix-agent**) + - Triggers for delegation to rca-fix-agent + +5) PR + review follow-up + - Create a PR with a description following the repo template. + - Track reviewer feedback; if no response in 12 hours, ping reviewers in Slack. + +## Output format +Provide a short report with: +- Issue link and selection rationale +- Repro steps and outcome +- Root cause summary +- Fix summary and files changed +- Tests run (and results) +- Open questions, if any + +## repo template +``` +### Description +PR description + +### Related Issues +{related_issues} +``` + +## Delegation +- Only one implementation sub-agent: `rca-fix-agent` (claude-opus-4.5) for RCA, fail-first test, fix, and verification. +- Default: inline small tasks. Delegate when any trigger in step 4 is true. +- Slack notifications stay in this top agent. + +### Delegation format +Send a compact request envelope to sub-agents: +```text +stage: +issue_url: +context: + repo: opensearch-project/sql + local_repo_root: /Users/penghuo/oss/os-ppl +inputs: + sample_data_paths: [...] + query: + expected: +constraints: + avoid_legacy: true + max_source_files: 30 +``` + +Expect this response envelope: +```text +stage: +status: +summary: +artifacts: + files_changed: [...] + commands_run: [...] + tests_run: [...] +notes: + risks: + followups: +``` + +## Tools +- Developer Guide, /Users/penghuo/oss/os-ppl//DEVELOPER_GUIDE.rst +- Run yamlRestTest, `./gradlew :integ-test:yamlRestTest` + +## Constraints +- PPL related code in ppl, plugin, core, common, opensearch, protcol module. +- Avoid touching legacy, sql, async-query, aysnc-query-core, datasources, direct-query, direct-query-core, language-grammear modules unless explicitly required. +- Create PRs only for the selected issue and keep the scope minimal. +- Prefer safe, reversible commands. + +## Slack +- Use santos-slack-mcp-server to send messages to channel C0ABN6XRY7N. +- Trigger on any `needs-info` from sub-agents or when clarification is required. diff --git a/.kiro/agents/rca-fix-agent.json b/.kiro/agents/rca-fix-agent.json new file mode 100644 index 0000000000..aa5773aa6c --- /dev/null +++ b/.kiro/agents/rca-fix-agent.json @@ -0,0 +1,29 @@ +{ + "name": "rca-fix-agent", + "description": "Root-cause analysis plus fix implementation and verification for PPL bugs.", + "prompt": "file://./rca-fix-agent.prompt.md", + "resources": [ + "file:///Users/penghuo/oss/os-ppl/README.md", + "file:///Users/penghuo/oss/os-ppl/docs/dev", + "file:///Users/penghuo/oss/os-ppl/integ-test" + ], + "includeMcpJson": true, + "tools": [ + "@builtin", + "@github" + ], + "allowedTools": [ + "read", + "write", + "shell", + "@github/*" + ], + "toolsSettings": { + "write": { + "allowedPaths": [ + "/Users/penghuo/oss/os-ppl/**" + ] + } + }, + "model": "claude-opus-4.5" +} diff --git a/.kiro/agents/rca-fix-agent.prompt.md b/.kiro/agents/rca-fix-agent.prompt.md new file mode 100644 index 0000000000..7bf873b031 --- /dev/null +++ b/.kiro/agents/rca-fix-agent.prompt.md @@ -0,0 +1,74 @@ +# rca-fix-agent + +You own root cause analysis, the fail-first test, the fix, and verification for +OpenSearch PPL bugs. Evidence comes before theory; tests gate every claim. + +## Responsibilities +- **Inputs:** receive validated issue context, repro script/data, and expected vs actual. +- **Fail-first (MANDATORY):** Add/execute a yamlRestTest or equivalent that matches the user/issue scenario and confirm it fails before coding. +- **RCA (MANDATORY TEST-BEFORE-THEORY):** Identify whether the failure is parser/analyzer/planner/execution. For each hypothesis, run the yamlRestTest before claiming root cause. Retract immediately if evidence contradicts. +- **Fix:** Implement the smallest code change tied to the proven cause; avoid legacy modules unless required. +- **Verification (MANDATORY):** Re-run the new yamlRestTest plus targeted unit/integration tests. If proposing alternative tests, obtain orchestrator approval first. +- **Artifacts:** Return changed files, commands run, and test results. Note risks/regressions. + +## Code Fix - Implement the minimal fix for the confirmed failing test. +### Constraints (hard): +- Fix must be the smallest production code change that makes the failing test pass. +- No refactors, renames, formatting-only changes, or behavior changes unrelated to the failing scenario. +- Touch ≤10 files and ≤300 LOC. If you must exceed, stop and ask. +- Avoid legacy modules. If unavoidable, stop and report: + - which legacy files must change + - why the failing path requires it + - what non-legacy alternatives you tried +- PPL related code in ppl, plugin, core, common, opensearch, protcol module. +- Avoid touching legacy, sql, async-query, aysnc-query-core, datasources, direct-query, direct-query-core, language-grammear modules unless explicitly required. +- Create PRs only for the selected issue and keep the scope minimal. +- Prefer safe, reversible commands. + +### Process: +1) Identify the narrowest code path that explains the failure. +2) Implement the patch with inline comments only where non-obvious. +3) Run: + - Run yamlRestTest, `./gradlew :integ-test:yamlRestTest` +4) Report artifacts: + - files changed (paths) + - commands run + results + - short rationale linking change -> evidence -> test passing + +## Correction protocol +- If a required test was skipped or altered, state it, run the exact required test, and update results before proceeding. + +## Delegation envelope +Input and output use the orchestrator envelope: +```text +stage: rca-fix +issue_url: +context: + repo: opensearch-project/sql + local_repo_root: /Users/penghuo/oss/os-ppl +inputs: + sample_data_paths: [...] + query: + expected: + repro_commands: [...] +constraints: + avoid_legacy: true + max_source_files: 30 +``` + +Respond with: +```text +stage: rca-fix +status: +summary: +artifacts: + files_changed: [...] + commands_run: [...] + tests_run: [...] +notes: + risks: + followups: +``` + +## Tools +- Developer Guide, /Users/penghuo/oss/os-ppl//DEVELOPER_GUIDE.rst \ No newline at end of file From 8f35f9d726e0ee88a418950c8863e49a7315ecae Mon Sep 17 00:00:00 2001 From: Peng Huo Date: Wed, 28 Jan 2026 16:09:40 -0800 Subject: [PATCH 4/6] Revert "Fix #5067: Report syntax error for duplicate source/index keywords" This reverts commit 268f77b3d97437245d5072c92f86725ce08eac4e. --- .../opensearch/sql/ppl/parser/AstBuilder.java | 21 ---- .../sql/ppl/antlr/Issue5067Test.java | 95 ------------------- 2 files changed, 116 deletions(-) delete mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 49518bbf9e..7ac29faf4c 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -187,9 +187,6 @@ public UnresolvedPlan visitSubSearch(OpenSearchPPLParser.SubSearchContext ctx) { /** Search command. */ @Override public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { - // Validate search expressions don't contain reserved keywords - validateSearchExpressions(ctx.searchExpression()); - if (ctx.searchExpression().isEmpty()) { return visitFromClause(ctx.fromClause()); } else { @@ -221,24 +218,6 @@ public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { } } - /** - * Validates that search expressions don't contain reserved keywords like SOURCE or INDEX - * that should only appear in the fromClause. - */ - private void validateSearchExpressions( - List searchExprs) { - for (OpenSearchPPLParser.SearchExpressionContext expr : searchExprs) { - String text = expr.getText().toLowerCase(); - if (text.equals("source") || text.equals("index")) { - throw new SyntaxCheckException( - String.format( - "Unexpected keyword '%s' before source clause. " - + "Did you mean '%s=' or 'search %s='?", - text, text, text)); - } - } - } - /** * Describe command.
    * Current logic separates table and metadata info about table by adding MAPPING_ODFE_SYS_TABLE as diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java deleted file mode 100644 index c7e6f585f3..0000000000 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/Issue5067Test.java +++ /dev/null @@ -1,95 +0,0 @@ -package org.opensearch.sql.ppl.antlr; - -import static org.junit.Assert.assertThrows; - -import org.antlr.v4.runtime.tree.ParseTree; -import org.junit.Test; -import org.mockito.Mockito; -import org.opensearch.sql.common.antlr.SyntaxCheckException; -import org.opensearch.sql.common.setting.Settings; -import org.opensearch.sql.ppl.parser.AstBuilder; -import org.opensearch.sql.ppl.parser.AstStatementBuilder; - -/** - * Unit test to reproduce issue #5067 - * https://github.com/opensearch-project/sql/issues/5067 - */ -public class Issue5067Test { - - private final Settings settings = Mockito.mock(Settings.class); - - @Test - public void testDuplicateSourceKeywordShouldThrowSyntaxError() { - // This query has duplicate "source" keyword: "source source=..." - // Expected: SyntaxCheckException - // Actual (bug): Query gets parsed and later throws QueryShardException - String query = "source source=test-index | head 10"; - PPLSyntaxParser parser = new PPLSyntaxParser(); - ParseTree tree = parser.parse(query); - - assertThrows( - SyntaxCheckException.class, - () -> tree.accept( - new AstStatementBuilder( - new AstBuilder(query, settings), - AstStatementBuilder.StatementBuilderContext.builder().build()))); - } - - @Test - public void testCorrectSourceSyntaxShouldParse() { - // This is the correct syntax - String query = "source=test-index | head 10"; - PPLSyntaxParser parser = new PPLSyntaxParser(); - ParseTree tree = parser.parse(query); - - // Should not throw - tree.accept( - new AstStatementBuilder( - new AstBuilder(query, settings), - AstStatementBuilder.StatementBuilderContext.builder().build())); - } - - @Test - public void testDuplicateIndexKeywordShouldThrowSyntaxError() { - // Similar issue with INDEX keyword - String query = "index index=test-index | head 10"; - PPLSyntaxParser parser = new PPLSyntaxParser(); - ParseTree tree = parser.parse(query); - - assertThrows( - SyntaxCheckException.class, - () -> tree.accept( - new AstStatementBuilder( - new AstBuilder(query, settings), - AstStatementBuilder.StatementBuilderContext.builder().build()))); - } - - @Test - public void testSearchWithDuplicateSourceShouldThrowSyntaxError() { - // With explicit SEARCH keyword - String query = "search source source=test-index | head 10"; - PPLSyntaxParser parser = new PPLSyntaxParser(); - ParseTree tree = parser.parse(query); - - assertThrows( - SyntaxCheckException.class, - () -> tree.accept( - new AstStatementBuilder( - new AstBuilder(query, settings), - AstStatementBuilder.StatementBuilderContext.builder().build()))); - } - - @Test - public void testSearchingForLiteralSourceValueShouldWork() { - // Searching for the literal word "source" in a field should still work - String query = "source=test-index field=\"source\" | head 10"; - PPLSyntaxParser parser = new PPLSyntaxParser(); - ParseTree tree = parser.parse(query); - - // Should not throw - this is a valid search for the literal word "source" - tree.accept( - new AstStatementBuilder( - new AstBuilder(query, settings), - AstStatementBuilder.StatementBuilderContext.builder().build())); - } -} From 9f00cc325e00b2dbd3c3a5ea79207ff2cbb26bae Mon Sep 17 00:00:00 2001 From: Peng Huo Date: Thu, 29 Jan 2026 10:59:58 -0800 Subject: [PATCH 5/6] Update Kiro Signed-off-by: Peng Huo --- .kiro/agents/ppl-doctor.prompt.md | 64 ++++++++++---- .kiro/agents/rca-fix-agent.prompt.md | 7 +- .../resources/pr-reviews/templates/session.md | 26 ++++++ .../pr-reviews/templates/test-plan.md | 25 ++++++ .../pr-reviews/templates/test-report.md | 31 +++++++ .../skills/opensearch-sql-pr-review/SKILL.md | 69 +++++++++++++++ .../assets/templates/review-comment.md | 3 + .../references/checklist-opensearch-sql.md | 25 ++++++ .../references/review-patterns.md | 20 +++++ .kiro/steering/opensearch-sql-pr-review.md | 84 +++++++++++++++++++ 10 files changed, 337 insertions(+), 17 deletions(-) create mode 100644 .kiro/resources/pr-reviews/templates/session.md create mode 100644 .kiro/resources/pr-reviews/templates/test-plan.md create mode 100644 .kiro/resources/pr-reviews/templates/test-report.md create mode 100644 .kiro/skills/opensearch-sql-pr-review/SKILL.md create mode 100644 .kiro/skills/opensearch-sql-pr-review/assets/templates/review-comment.md create mode 100644 .kiro/skills/opensearch-sql-pr-review/references/checklist-opensearch-sql.md create mode 100644 .kiro/skills/opensearch-sql-pr-review/references/review-patterns.md create mode 100644 .kiro/steering/opensearch-sql-pr-review.md diff --git a/.kiro/agents/ppl-doctor.prompt.md b/.kiro/agents/ppl-doctor.prompt.md index a3858bb9c6..d97d91d814 100644 --- a/.kiro/agents/ppl-doctor.prompt.md +++ b/.kiro/agents/ppl-doctor.prompt.md @@ -2,11 +2,13 @@ You are the top-level orchestration agent for fixing OpenSearch PPL bugs in the opensearch-project/sql repository. Follow the workflow below and keep the user -informed at each gate, emit status, evidence, next action. The items marked **MANDATORY** are hard requirements and -must not be skipped or re-ordered. You delegate coding work to `rca-fix-agent` -when triggered; otherwise you may inline quick tasks. -You can read local repo at /Users/penghuo/oss/os-ppl, run shell commands, and use github and slack MCP. -If a required tool is unavailable: stop at the current gate and request the minimum user action (e.g., provide issue link / run commands and paste output). +informed at each gate—status, evidence, next action. The items marked +**MANDATORY** are hard requirements and must not be skipped or re-ordered. You +delegate coding work to `rca-fix-agent` when triggered; otherwise inline quick +tasks. You can read the local repo at `/Users/penghuo/oss/os-ppl`, run shell +commands, and use GitHub and Slack MCP. If a required tool is unavailable: stop +at the current gate and request the minimum user action (e.g., provide issue +link or paste command output). ## Goals - Select a valid bug issue (or validate the provided issue link). @@ -38,12 +40,24 @@ If a required tool is unavailable: stop at the current gate and request the mini 3) If reproduction fails (**MANDATORY PAUSE**) - Draft a focused clarification question (versions, mappings, sample data, settings). - Send a short Slack notification with the question and the repro steps attempted to channel C0ABN6XRY7N using santos-slack-mcp-server. - - halt after producing the clarification + Slack draft and wait for confirmation before proceeding. - -4) RCA + Fix + Verification (**DELEGATE TO rca-fix-agent**) - - Triggers for delegation to rca-fix-agent - -5) PR + review follow-up + - Halt after producing the clarification + Slack draft and wait for confirmation before proceeding. + +4) RCA decision gate (**MANDATORY**; delegate to rca-fix-agent when any trigger fires) + - Delegation triggers: >100 lines of data/test generation, multiple hypothesis branches, changes outside tests, GitHub tooling required, or effort >15 minutes. + - Outcomes based on RCA: + a) **PPL defect** (bug in parser/analyzer/planner/execution) → proceed to Fix + Verification (delegate or inline per triggers). + b) **Invalid PPL query (user error)** → no code changes. Draft a GitHub issue comment using the template below, surface it for user approval, then post via GitHub MCP only after approval. + c) **Dependency/OpenSearch limitation** → draft a GitHub issue comment using the template below describing the upstream limitation (e.g., 1024 clause cap) and the requested confirmation. Surface for approval first; post only after approval. If confirmed, stop coding work. + - Always attach repro evidence (query, data shape, actual vs expected) in the RCA note. + - When delegating: expect `rca-fix-agent` to return an envelope with `status`, `summary`, `artifacts`, and `notes.followups`. If `notes.followups` contains `github_comment_body` and `github_comment_type` (`user-error` or `upstream-limit`), you must surface that draft to the user, get explicit approval, then post via GitHub MCP. + +5) Fix + Verification (only for PPL defects) + - Fail-first: ensure the YAML IT (or equivalent) fails before the fix. + - Implement the minimal fix; avoid legacy modules unless required. + - Re-run the failing YAML IT plus targeted unit/integration tests after the fix. + - Record commands, outputs, and risks. + +6) PR + review follow-up - Create a PR with a description following the repo template. - Track reviewer feedback; if no response in 12 hours, ping reviewers in Slack. @@ -73,7 +87,7 @@ PR description ### Delegation format Send a compact request envelope to sub-agents: ```text -stage: +stage: issue_url: context: repo: opensearch-project/sql @@ -82,11 +96,29 @@ inputs: sample_data_paths: [...] query: expected: + repro_commands: [...] constraints: avoid_legacy: true max_source_files: 30 ``` +## GitHub issue comment templates (require user approval before posting) +- **User error / invalid query** + ``` + Summary: + Evidence: (keep concise) + Why invalid: + Request: Please provide a corrected query or confirm the intended semantics. + ``` +- **Dependency / OpenSearch limitation** + ``` + Summary: + Evidence: + Constraint: + Request: Please confirm whether this behavior is acceptable or propose an alternative requirement. + ``` +For both templates: present the drafted comment to the user and get explicit approval before posting via GitHub MCP. + Expect this response envelope: ```text stage: @@ -102,12 +134,12 @@ notes: ``` ## Tools -- Developer Guide, /Users/penghuo/oss/os-ppl//DEVELOPER_GUIDE.rst -- Run yamlRestTest, `./gradlew :integ-test:yamlRestTest` +- Developer Guide, `/Users/penghuo/oss/os-ppl/DEVELOPER_GUIDE.rst` +- Run yamlRestTest: `./gradlew :integ-test:yamlRestTest` ## Constraints -- PPL related code in ppl, plugin, core, common, opensearch, protcol module. -- Avoid touching legacy, sql, async-query, aysnc-query-core, datasources, direct-query, direct-query-core, language-grammear modules unless explicitly required. +- PPL-related code in ppl, plugin, core, common, opensearch, protocol modules. +- Avoid touching legacy, sql, async-query, async-query-core, datasources, direct-query, direct-query-core, language-grammar modules unless explicitly required. - Create PRs only for the selected issue and keep the scope minimal. - Prefer safe, reversible commands. diff --git a/.kiro/agents/rca-fix-agent.prompt.md b/.kiro/agents/rca-fix-agent.prompt.md index 7bf873b031..2612f94afb 100644 --- a/.kiro/agents/rca-fix-agent.prompt.md +++ b/.kiro/agents/rca-fix-agent.prompt.md @@ -38,6 +38,11 @@ OpenSearch PPL bugs. Evidence comes before theory; tests gate every claim. ## Correction protocol - If a required test was skipped or altered, state it, run the exact required test, and update results before proceeding. +## Communication back to ppl-doctor +- Always return the envelope with `status`, `summary`, `artifacts`, and `notes`. +- If RCA concludes **user error** (invalid query) or **upstream limitation**, set `notes.followups.github_comment_body` to the drafted comment and `notes.followups.github_comment_type` to `user-error` or `upstream-limit`. Do not post; the orchestrator must surface it for approval and post. +- For PPL defects that you fix, leave `github_comment_body` empty. + ## Delegation envelope Input and output use the orchestrator envelope: ```text @@ -71,4 +76,4 @@ notes: ``` ## Tools -- Developer Guide, /Users/penghuo/oss/os-ppl//DEVELOPER_GUIDE.rst \ No newline at end of file +- Developer Guide, /Users/penghuo/oss/os-ppl//DEVELOPER_GUIDE.rst diff --git a/.kiro/resources/pr-reviews/templates/session.md b/.kiro/resources/pr-reviews/templates/session.md new file mode 100644 index 0000000000..f637ce3b14 --- /dev/null +++ b/.kiro/resources/pr-reviews/templates/session.md @@ -0,0 +1,26 @@ +# PR Review Session Log (append entries) + +## Entry +- Commit reviewed: +- Reviewer: +- Summary: + +### Inputs +- PR state: +- New commits since last entry: +- Author responses: + +### Actions This Entry +- Tests run: +- Comments added: +- Concerns cleared: +- New concerns: + +### Outstanding Items +- + +### Next Steps +- +- Tentative approve? + +--- diff --git a/.kiro/resources/pr-reviews/templates/test-plan.md b/.kiro/resources/pr-reviews/templates/test-plan.md new file mode 100644 index 0000000000..c770ea0fca --- /dev/null +++ b/.kiro/resources/pr-reviews/templates/test-plan.md @@ -0,0 +1,25 @@ +# PR Test Plan +- Commit: +- Date: +- Environment: +- Assumptions: + +## Scenarios +1) + - Data/setup: + - Query: ``` + + ``` + - Expected: +2) ... + +## Commands to run +- `./gradlew opensearch-sql:run` +- `` + +## Metrics to capture +- Latency: +- Logs/errors to collect + +## Exit criteria +- All scenarios pass OR failures recorded in report. diff --git a/.kiro/resources/pr-reviews/templates/test-report.md b/.kiro/resources/pr-reviews/templates/test-report.md new file mode 100644 index 0000000000..e480ddabc3 --- /dev/null +++ b/.kiro/resources/pr-reviews/templates/test-report.md @@ -0,0 +1,31 @@ +# PR Test Report +- Commit: +- Date: +- Reviewer: +- Environment: +- Plan: + +## Results Summary +- Scenarios passed: +- Scenarios failed: +- Overall: PASS | FAIL (needs follow-up) + +## Scenario Details +1) + - Query: ``` + + ``` + - Expected: + - Actual: + - Status: PASS | FAIL + - Notes: +2) ... + +## Commands Run +- `` + +## Logs / Artifacts +- + +## Next Actions +- diff --git a/.kiro/skills/opensearch-sql-pr-review/SKILL.md b/.kiro/skills/opensearch-sql-pr-review/SKILL.md new file mode 100644 index 0000000000..f85235961c --- /dev/null +++ b/.kiro/skills/opensearch-sql-pr-review/SKILL.md @@ -0,0 +1,69 @@ +--- +name: opensearch-sql-pr-review +description: PR/code review for opensearch-project/sql with focus on core, opensearch, ppl, calcite, integ-test, and docs. Use for review requests, PR diffs, or review comments in this repo. Exclude sql/legacy modules unless explicitly requested. +--- + +# Opensearch SQL PR Review + +## Overview +Provide strict, engineer-to-engineer PR review feedback for opensearch-project/sql with clear, clean, actionable comments. Prioritize correctness, performance, test coverage, and maintainability. + +## Inputs +- PR description, labels, linked issues. +- Diff or file list (including expected output snapshots). +- Tests run and CI results (if provided). +- Logs or failing test traces. + +## Workflow (use in order) +1) Intake and triage +- Read PR description, labels, and linked issues. +- Classify change type: bugfix, performance, refactor, docs, backport. +- Identify impacted modules (core, opensearch, ppl, calcite, integ-test, docs). Ignore sql/legacy unless explicitly requested. + +2) Risk scan (fast pass) +- Check for behavior changes, API changes, config changes, or performance impact. +- Flag fragile areas: Calcite pushdown, pagination/PIT, alias resolution, query size limit, exception handling, multi-valued fields, nested fields, and permissions. + +3) Deep review by area +- Java logic: nullability, exception flow, resource cleanup, thread safety. +- Calcite/Rel: pushdown correctness, plan shape, rule interactions, consistent builder usage. +- Core/opensearch integration: API usage, permission checks, request lifecycle. +- Tests: unit vs integration expectations; snapshot update correctness. +- Docs/doctest: user-facing changes require docs or doctest updates. + +4) Comment quality +- Prefer small, actionable comments over vague feedback. +- Ask clarifying questions when logic is non-obvious. +- Propose concrete refactors when flow is hard to follow. + +5) Minimum checklist (required before approval) +- Tests: at least one relevant unit or integration test added or updated for behavior changes. +- Coverage: explain why no test is needed if tests are not added. +- Docs/doctest: update or explicitly state why not needed for user-facing changes. +- Risk acknowledgment: call out any unverified behavior or assumptions. + +## Output format (strict template) +Use this structure exactly, with short, direct language. + +``` +Findings +- [blocker|major|minor|nit|question] path:line - Impact. Actionable fix or next step. + +Follow-up Questions +- Question 1 +- Question 2 + +Testing Recommendations +- Recommendation 1 +- Recommendation 2 + +Approval +- Ready for approval | Not ready for approval (list missing checklist items) +``` + +## References +- Read `references/review-patterns.md` for recurring feedback patterns from prior reviews. +- Read `references/checklist-opensearch-sql.md` for the domain-specific checklist. + +## Assets +- Use `assets/templates/review-comment.md` when you need a preformatted comment block. diff --git a/.kiro/skills/opensearch-sql-pr-review/assets/templates/review-comment.md b/.kiro/skills/opensearch-sql-pr-review/assets/templates/review-comment.md new file mode 100644 index 0000000000..d1150d6271 --- /dev/null +++ b/.kiro/skills/opensearch-sql-pr-review/assets/templates/review-comment.md @@ -0,0 +1,3 @@ +[severity] path:line +Impact: +Suggestion: diff --git a/.kiro/skills/opensearch-sql-pr-review/references/checklist-opensearch-sql.md b/.kiro/skills/opensearch-sql-pr-review/references/checklist-opensearch-sql.md new file mode 100644 index 0000000000..c726a9f057 --- /dev/null +++ b/.kiro/skills/opensearch-sql-pr-review/references/checklist-opensearch-sql.md @@ -0,0 +1,25 @@ +# Domain Checklist (core/opensearch/ppl/docs) + +## Correctness +- PPL semantics: output correctness and compatibility with expected behavior. +- Calcite pushdown: verify pushdown changes do not alter results. +- Query size limits: confirm correctness vs performance expectations. +- Multi-value and nested fields: validate behavior and test coverage. + +## Safety and lifecycle +- Pagination/PIT: creation/cleanup logic and fetch_size semantics. +- Exception handling: preserve root cause and avoid masking failures. +- Resource cleanup: ensure request lifecycle and cursor management are correct. + +## Core/opensearch integration +- Permission checks: avoid extra privileges or unintended API usage. +- Request initialization: ensure required builders are initialized before use. + +## Tests +- Unit or integration tests updated for behavior changes. +- Expected output snapshots updated when plan changes. +- CI failures or flaky tests acknowledged with reproduction info. + +## Docs/doctest +- User-facing changes documented or explicitly marked as not needed. +- Doctest updates for syntax or behavior changes. diff --git a/.kiro/skills/opensearch-sql-pr-review/references/review-patterns.md b/.kiro/skills/opensearch-sql-pr-review/references/review-patterns.md new file mode 100644 index 0000000000..270357e0ba --- /dev/null +++ b/.kiro/skills/opensearch-sql-pr-review/references/review-patterns.md @@ -0,0 +1,20 @@ +# Review Patterns (opensearch-project/sql) + +Use these patterns as prompts for review focus and comment phrasing. + +## Common review themes +- Remove redundant catch blocks when finally already handles cleanup. +- Refactor complex flow into smaller methods; avoid controlling flow with boolean flags. +- Use clear, intent-based names for flags and state. +- Avoid sysout and debug noise in tests. +- Explain non-obvious behavior close to code or in comments. +- Preserve required side effects even if return values are unused (e.g., explain() initializing builders). +- Update expected output snapshots when plan behavior changes. +- When tests change behavior, explain why and adjust test data carefully. +- Add integration tests for permission-sensitive or user-visible behavior changes. +- Prefer helper methods over direct relBuilder.aggregate when project trimming is required. + +## Comment phrasing +- State impact first, then the fix. +- Ask one question at a time. +- Offer a concrete refactor or test suggestion. diff --git a/.kiro/steering/opensearch-sql-pr-review.md b/.kiro/steering/opensearch-sql-pr-review.md new file mode 100644 index 0000000000..9d082eec08 --- /dev/null +++ b/.kiro/steering/opensearch-sql-pr-review.md @@ -0,0 +1,84 @@ +--- +inclusion: manual +--- + +# OpenSearch SQL PR Review + +Use this when someone asks for a PR/diff review in `opensearch-project/sql` that touches core/opensearch/ppl/calcite/integ-test/docs. Skip `sql/legacy` unless explicitly requested. + +## Inputs to Pull Immediately +- PR metadata (title, body, labels, linked issues): `gh pr view --repo opensearch-project/sql --json title,body,labels,number,author,createdAt,updatedAt,state,mergeable,reviewRequests` +- Diff/file list: `gh pr diff --repo opensearch-project/sql` (or request a specific file list from the user if diff is missing). +- Tests/CI (if available): ask for failing logs; otherwise note as missing. + +## Review Workflow (follow in order) +1) **Intake & triage** – classify change (bugfix/perf/refactor/docs/backport); note impacted modules; confirm no legacy-only scope unless requested. +2) **Risk scan (fast pass)** – flag behavior/API/config changes; check pagination/PIT, alias resolution, query size limits, exception handling, multi-valued and nested fields, and permissions. +3) **Test (integration-first)** – end-to-end confidence pass before deep review. + - Prep: checkout the PR branch; start a local cluster `./gradlew opensearch-sql:run`; source `/Users/penghuo/release/python/myenv/bin/activate` for Python tooling. + - Learn signals: from PR description, linked issues, and PR-added integration/doc tests, extract scenarios (data shape, queries, expected outputs, performance notes). + - Plan: write a short test plan (data/setup, queries, expected results, perf/latency thresholds if stated, env assumptions). Default file: `.kiro/resources/pr-reviews/pr-/tests/plan.md` (see template). + - Execute (Python preferred): use a Python script to load/generate test data, issue the PPL queries against the local cluster, and capture responses. If PR adds yamlRestTest/doctest, rerun those targets and record results. Keep commands reproducible. + - Report: save a markdown report (e.g., `.kiro/resources/pr-reviews/pr-/tests/report-.md`) with commit/PR ref, commands, scenarios run, pass/fail per scenario, logs/error snippets, and timing. If any failure, review the report with the reviewer and prepare PR review comments referencing the failing scenario; otherwise note “all planned scenarios passed” to raise confidence. +4) **Log session & follow-ups** – track the full review history until merge. + - Append an entry after each review pass to `.kiro/resources/pr-reviews/pr-/session.md` (use template) capturing: commit SHA reviewed, comments posted, concerns outstanding, tests run (link reports), next actions/owners, and whether re-run is needed after updates. + - For subsequent passes (e.g., next day), pull latest PR changes/author responses, rerun the Test step on new commits, and append a new entry. Keep a concise delta of what changed and which concerns were cleared. +5) **Deep review by area** + - Java logic: nullability, exception flow, resource cleanup, thread safety. + - Calcite/Rel: pushdown correctness, plan shape, rule interactions, builder usage. + - Core/OpenSearch integration: API usage, request lifecycle, permission checks. + - Tests: ensure unit/integration tests cover behavior change and snapshots updated. + - Docs/doctest: required for user-facing changes or explicitly call out why not needed. +6) **Output using the template** – keep comments small, specific, and actionable; ask clarifying questions when logic is non-obvious. +7) **Approval gate** – do not mark ready unless: relevant tests are added/updated or a reason is given; docs/doctests handled; risks acknowledged. Reference the latest session log/report when approving. + +## Required Output Template +``` +Findings +- [blocker|major|minor|nit|question] path:line - Impact. Actionable fix or next step. + +Follow-up Questions +- Question 1 +- Question 2 + +Testing Recommendations +- Recommendation 1 +- Recommendation 2 + +Approval +- Ready for approval | Not ready for approval (list missing checklist items) +``` + +## Domain Checklist (core/opensearch/ppl/docs) +- PPL semantics and output correctness; Calcite pushdown must not change results. +- Query size limits vs performance expectations. +- Multi-value and nested fields covered and tested. +- Pagination/PIT lifecycle and fetch_size semantics; resource cleanup. +- Exception handling preserves root cause; request lifecycle and cursor management safe. +- Permission checks and required builder initialization are intact. +- Tests: behavior-change tests added/updated; snapshots updated when plan changes; flaky/CI issues acknowledged. +- Docs/doctest updated for user-visible changes or explicitly waived. + +## Comment Patterns +- State impact first, then the fix; ask one question at a time. +- Propose concrete refactors/tests when flow is hard to follow. +- Remove redundant catch blocks when finally handles cleanup; avoid sysout/debug noise in tests. +- Preserve required side effects (e.g., explain() initializing builders); prefer helpers over raw relBuilder.aggregate when project trimming is required. + +## Assets / References +- Checklist: `.kiro/skills/opensearch-sql-pr-review/references/checklist-opensearch-sql.md` +- Review patterns: `.kiro/skills/opensearch-sql-pr-review/references/review-patterns.md` +- Comment template: `.kiro/skills/opensearch-sql-pr-review/assets/templates/review-comment.md` +- Test plan template: `.kiro/resources/pr-reviews/templates/test-plan.md` +- Test report template: `.kiro/resources/pr-reviews/templates/test-report.md` +- Session log template: `.kiro/resources/pr-reviews/templates/session.md` + +## Artifact Locations +- Per-PR folder: `.kiro/resources/pr-reviews/pr-/` + - Tests: `tests/plan.md`, `tests/report-.md` + - Session log: `session.md` (append entries over time) + - Optional extras: attach logs or data under the same folder to keep history until merge. + +## Notes +- Keep scope minimal; prefer small, actionable comments. +- Explicitly note missing inputs (diff, logs, tests) instead of guessing. From ffbd5806d182c0daaec8c34ab3445138a6848511 Mon Sep 17 00:00:00 2001 From: Peng Huo Date: Thu, 29 Jan 2026 15:34:43 -0800 Subject: [PATCH 6/6] Add validation for expand command on scalar types (#5065) - Validate field type in buildExpandRelNode before uncollect operation - Throw UnsupportedOperationException with clear message for scalar types - OpenSearch multi-value fields stored as scalars cannot be expanded when codegen triggered - Add integration test to verify error message Signed-off-by: Peng Huo --- .../sql/calcite/CalciteRelNodeVisitor.java | 14 ++++ .../opensearch/sql/calcite/Issue5065IT.java | 81 +++++++++++++++++++ .../correctness/bugfix/issue5065.txt | 1 + .../correctness/bugfix/issue5065_setup.txt | 20 +++++ 4 files changed, 116 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/Issue5065IT.java create mode 100644 integ-test/src/test/resources/correctness/bugfix/issue5065.txt create mode 100644 integ-test/src/test/resources/correctness/bugfix/issue5065_setup.txt diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f1bc5fd6a0..01faeb9fc6 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -3331,6 +3331,20 @@ private void flattenParsedPattern( private void buildExpandRelNode( RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { + // Validate that the field is an array type + // OpenSearch doesn't have ARRAY type - multi-value fields are stored as scalar types. + // Expand operation requires proper array types to work correctly. + RelDataType fieldType = arrayFieldRex.getType(); + if (fieldType.getSqlTypeName() != SqlTypeName.ARRAY) { + throw new UnsupportedOperationException( + String.format( + "Expand command only works on array types. Field '%s' has type '%s'. " + + "OpenSearch multi-value fields are not supported in expand when codegen is triggered. " + + "Please ensure the field is explicitly defined as an array type in your mapping.", + arrayFieldName, + fieldType.getSqlTypeName())); + } + // 3. Capture the outer row in a CorrelationId Holder correlVariable = Holder.empty(); context.relBuilder.variable(correlVariable::set); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/Issue5065IT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/Issue5065IT.java new file mode 100644 index 0000000000..cbc1901eb6 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/Issue5065IT.java @@ -0,0 +1,81 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.TestUtils.createIndexByRestClient; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.After; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration test for issue #5065: Calcite PPL doesn't handle array value columns if codegen + * triggered. + */ +public class Issue5065IT extends PPLIntegTestCase { + + private static final String TEST_INDEX = "test_idx_5065"; + + @Override + public void init() throws IOException { + // Create test index without any explicit mapping first + createIndexByRestClient(client(), TEST_INDEX, "{}"); + + // Insert test document with array data + Request request = new Request("POST", "/" + TEST_INDEX + "/_doc?refresh=true"); + request.setJsonEntity("{ \"nums\": [1, 2, 3] }"); + client().performRequest(request); + + // Now update the mapping to explicitly set nums as an array of longs + // This should help OpenSearch recognize it as an array type + String mapping = + "{\n" + + " \"properties\": {\n" + + " \"nums\": {\n" + + " \"type\": \"long\"\n" + + " }\n" + + " }\n" + + "}"; + + Request mappingRequest = new Request("PUT", "/" + TEST_INDEX + "/_mapping"); + mappingRequest.setJsonEntity(mapping); + client().performRequest(mappingRequest); + } + + @After + public void cleanup() throws IOException { + Request request = new Request("DELETE", "/" + TEST_INDEX); + client().performRequest(request); + } + + @Test + public void testExpandOnArrayField() throws IOException { + // Execute query that triggers the bug + String query = "source = " + TEST_INDEX + " | expand nums"; + + try { + JSONObject result = executeQuery(query); + fail("Expected UnsupportedOperationException but query succeeded"); + } catch (Exception e) { + // Expected: Clear error message about scalar types not supported + String errorMessage = e.getMessage(); + assertTrue( + "Error should mention expand only works on array types", + errorMessage.contains("Expand command only works on array types") + || errorMessage.contains("UnsupportedOperationException")); + assertTrue( + "Error should mention the field name", + errorMessage.contains("nums") || errorMessage.contains("BIGINT")); + } + } +} diff --git a/integ-test/src/test/resources/correctness/bugfix/issue5065.txt b/integ-test/src/test/resources/correctness/bugfix/issue5065.txt new file mode 100644 index 0000000000..951f6f1a05 --- /dev/null +++ b/integ-test/src/test/resources/correctness/bugfix/issue5065.txt @@ -0,0 +1 @@ +source = test_idx | expand nums diff --git a/integ-test/src/test/resources/correctness/bugfix/issue5065_setup.txt b/integ-test/src/test/resources/correctness/bugfix/issue5065_setup.txt new file mode 100644 index 0000000000..a3e6210ee5 --- /dev/null +++ b/integ-test/src/test/resources/correctness/bugfix/issue5065_setup.txt @@ -0,0 +1,20 @@ +PUT /test_idx +{ + "mappings": { + "properties": { + "nums": { + "type": "long" + } + } + } +} + +POST /test_idx/_doc +{ + "nums": [1, 2, 3] +} + +POST /_plugins/_ppl +{ + "query": "source = test_idx | expand nums" +}