Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
8362fc2
Initial checkpoint - following calcite way and commented legacy way
Oct 10, 2025
384ba15
Removed the build.gradle dependency opensearch-common
Oct 22, 2025
7f382f9
Ready to submit this PR
Oct 22, 2025
44c8124
Ready to submit this PR
Oct 22, 2025
3cad64e
Ready to submit this PR
Oct 22, 2025
8e4a2c5
Add mvexpand.rst
Oct 22, 2025
474617d
Add Tests
Oct 22, 2025
d502b03
Add the mvexpand.rst to the index.rst
Oct 23, 2025
c62defe
Remove the unwanted code
Oct 27, 2025
a3799b2
Fix the failing test
Oct 27, 2025
d90be9f
Address the PR comments and fix the tests accordingly
Oct 30, 2025
da16288
Address the PR comments and fix the tests accordingly
Oct 30, 2025
1301e06
Address the PR comments and fix the tests accordingly
Oct 30, 2025
beb31de
Add comment lines for buildUnnestForLeft
Oct 30, 2025
627ef8f
Fix the mvexpand.rst
Oct 31, 2025
58facf8
Fix the failing test
Nov 3, 2025
63cdbf7
Fix the failing test
Nov 3, 2025
bdc3aa1
Fix the failing test
Nov 3, 2025
fc8e345
Fix the failing test
Nov 3, 2025
c830356
Address the PR comments
Nov 6, 2025
e9b6f27
Address the PR comments
Nov 7, 2025
fa9436e
Address the PR comments
Nov 13, 2025
ea091d2
Address the PR comments
Nov 13, 2025
4d9b24d
Address the PR comments
Nov 14, 2025
b9d3164
Address the issue as the happy path scenario was not working the way …
Nov 19, 2025
26a59a4
MvExpand as its own implementation - not aliasing
Nov 20, 2025
43c806e
Refactoring EXPAND and MVEXPAND
Nov 20, 2025
a07dff2
Refactor EXPAND and MVEXPAND and fix its unittest
Nov 20, 2025
7be7473
Convert mvexpand.rst examples to doctest
Nov 20, 2025
2c0ea2c
metadata.rst was missing the mvexpand_logs entry
Nov 21, 2025
8749289
Address the PR comments for IT and visitMvExpand
Nov 23, 2025
9508874
Merge branch 'main' into main
srikanthpadakanti Nov 27, 2025
08b56ee
Add test for mvdedup function with duplicates
srikanthpadakanti Nov 28, 2025
3ae2c73
Merge branch 'main' into main
srikanthpadakanti Nov 28, 2025
bed2084
Update core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVi…
srikanthpadakanti Nov 28, 2025
5e616ff
Update core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVi…
srikanthpadakanti Nov 28, 2025
709704c
Address the PR comments
Nov 28, 2025
c45fa05
Address the PR comments
Nov 28, 2025
4f3435e
change the limit behavior from global to perDocument
Nov 28, 2025
a0b2c8c
Merge branch 'opensearch-project:main' into main
srikanthpadakanti Dec 2, 2025
47779e1
Merge branch 'opensearch-project:main' into main
srikanthpadakanti Dec 3, 2025
bf6b924
Fix the CI issues
Dec 4, 2025
9aec421
Merge branch 'main' into main
srikanthpadakanti Dec 9, 2025
bf87312
Update the index.rst
Dec 9, 2025
c9e2767
spotlessapply
Dec 9, 2025
2591a6c
Merge branch 'mywork-backup'
Dec 11, 2025
125cf3b
spotlessapply
Dec 11, 2025
00c990f
address merge issue
Dec 11, 2025
44814ab
address merge issue
Dec 11, 2025
f9dd692
change rst to md
Dec 11, 2025
69d6a5a
Merge branch 'opensearch-project:main' into main
srikanthpadakanti Dec 11, 2025
2464675
change rst to md
Dec 11, 2025
0f86c52
delete unnecessary test
Dec 11, 2025
32d3867
Merge branch 'main' into main
srikanthpadakanti Dec 18, 2025
07509ae
remove index.rst and add mvexpand entry in index.md
Dec 18, 2025
34db739
spotless apply
Dec 18, 2025
602358e
merge issues.
Dec 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import org.opensearch.sql.ast.tree.Lookup;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Multisearch;
import org.opensearch.sql.ast.tree.MvExpand;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
Expand Down Expand Up @@ -713,6 +714,11 @@ public LogicalPlan visitExpand(Expand expand, AnalysisContext context) {
throw getOnlyForCalciteException("Expand");
}

@Override
public LogicalPlan visitMvExpand(MvExpand node, AnalysisContext context) {
throw getOnlyForCalciteException("MvExpand");
}

/** Build {@link LogicalTrendline} for Trendline command. */
@Override
public LogicalPlan visitTrendline(Trendline node, AnalysisContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,51 +44,7 @@
import org.opensearch.sql.ast.statement.Explain;
import org.opensearch.sql.ast.statement.Query;
import org.opensearch.sql.ast.statement.Statement;
import org.opensearch.sql.ast.tree.AD;
import org.opensearch.sql.ast.tree.AddColTotals;
import org.opensearch.sql.ast.tree.AddTotals;
import org.opensearch.sql.ast.tree.Aggregation;
import org.opensearch.sql.ast.tree.Append;
import org.opensearch.sql.ast.tree.AppendCol;
import org.opensearch.sql.ast.tree.AppendPipe;
import org.opensearch.sql.ast.tree.Bin;
import org.opensearch.sql.ast.tree.Chart;
import org.opensearch.sql.ast.tree.CloseCursor;
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.Expand;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Flatten;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Join;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.Lookup;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Multisearch;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Regex;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Replace;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.SPath;
import org.opensearch.sql.ast.tree.Search;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.StreamWindow;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.ast.tree.Window;
import org.opensearch.sql.ast.tree.*;

/** AST nodes visitor Defines the traverse path. */
public abstract class AbstractNodeVisitor<T, C> {
Expand Down Expand Up @@ -461,4 +417,8 @@ public T visitAddTotals(AddTotals node, C context) {
public T visitAddColTotals(AddColTotals node, C context) {
return visitChildren(node, context);
}

public T visitMvExpand(MvExpand node, C context) {
return visitChildren(node, context);
}
}
5 changes: 5 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.MinSpanBin;
import org.opensearch.sql.ast.tree.MvExpand;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
Expand Down Expand Up @@ -136,6 +137,10 @@ public Expand expand(UnresolvedPlan input, Field field, String alias) {
return new Expand(field, alias).attach(input);
}

public static UnresolvedPlan mvexpand(UnresolvedPlan input, Field field, Integer limit) {
return new MvExpand(field, limit).attach(input);
}

public static UnresolvedPlan projectWithArg(
UnresolvedPlan input, List<Argument> argList, UnresolvedExpression... projectList) {
return new Project(Arrays.asList(projectList), argList).attach(input);
Expand Down
46 changes: 46 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import com.google.common.collect.ImmutableList;
import java.util.List;
import javax.annotation.Nullable;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Field;

/** AST node representing an {@code mvexpand <field> [limit N]} operation. */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[limit N] -> [limit=N]
It might be better referring as mvexpand PPL command to avoid inconsistency.

@ToString
@EqualsAndHashCode(callSuper = false)
public class MvExpand extends UnresolvedPlan {

private UnresolvedPlan child;
@Getter private final Field field;
@Getter @Nullable private final Integer limit;

public MvExpand(Field field, @Nullable Integer limit) {
this.field = field;
this.limit = limit;
}

@Override
public MvExpand attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitMvExpand(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
import org.opensearch.sql.ast.expression.ParseMethod;
import org.opensearch.sql.ast.expression.PatternMethod;
import org.opensearch.sql.ast.expression.PatternMode;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.Span;
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
Expand Down Expand Up @@ -122,6 +123,7 @@
import org.opensearch.sql.ast.tree.Lookup.OutputStrategy;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Multisearch;
import org.opensearch.sql.ast.tree.MvExpand;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
Expand Down Expand Up @@ -845,7 +847,11 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) {
.toList();
context.relBuilder.aggregate(context.relBuilder.groupKey(groupByList), aggCall);
buildExpandRelNode(
context.relBuilder.field(node.getAlias()), node.getAlias(), node.getAlias(), context);
context.relBuilder.field(node.getAlias()),
node.getAlias(),
node.getAlias(),
null,
context);
flattenParsedPattern(
node.getAlias(),
context.relBuilder.field(node.getAlias()),
Expand Down Expand Up @@ -3093,11 +3099,102 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) {
RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context);
String alias = expand.getAlias();

buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, context);
buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, null, context);

return context.relBuilder.peek();
}

/**
* MVExpand command visitor.
*
* <p>For Calcite remote planning, mvexpand shares the same expansion mechanics as {@link Expand}:
* it unnests the target multivalue field and joins back to the original relation. The additional
* mvexpand semantics (such as an optional per-document limit) are surfaced via the MVExpand AST
* node but reuse the same underlying RelBuilder pipeline as expand at this layer.
*
* @param mvExpand MVExpand command to be visited
* @param context CalcitePlanContext containing the RelBuilder and other context
* @return RelNode representing records with the expanded multi-value field
*/
@Override
public RelNode visitMvExpand(MvExpand mvExpand, CalcitePlanContext context) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it have lots of difference with visitExpand? Should it be mostly same other than limit parameter?

// 1. Visit children
visitChildren(mvExpand, context);

RelBuilder relBuilder = context.relBuilder;
RelDataType rowType = relBuilder.peek().getRowType();

Field field = mvExpand.getField();

String fieldName = extractFieldName(field);

// 2. Lookup field
RelDataTypeField matched = rowType.getField(fieldName, false, false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should rely on rexVisitor.analyze to resolve field node.


// 2A. Missing field → true EMPTY relation (no schema, no rows)
if (matched == null) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we raise error when the field is not found?

// Schema must include the missing field, even if no rows returned.
List<RelDataTypeField> fields = rowType.getFieldList();
List<RexNode> projects = new ArrayList<>();
List<String> names = new ArrayList<>();

// Keep existing fields
for (RelDataTypeField f : fields) {
projects.add(relBuilder.field(f.getIndex()));
names.add(f.getName());
}

// Add NULL for missing field
projects.add(relBuilder.literal(null));
names.add(fieldName);

relBuilder.project(projects, names);

// Now return 0 rows
relBuilder.filter(relBuilder.literal(false));

return relBuilder.peek();
}

// 2B. Non-array → SemanticCheckException (return immediately)
RelDataType type = matched.getType();
SqlTypeName sqlType = type.getSqlTypeName();

if (sqlType != SqlTypeName.ARRAY) {
throw new SemanticCheckException(
String.format(
"Cannot expand field '%s': expected ARRAY type but found %s",
fieldName, sqlType.getName()));
}

// 2C. Valid array → expand (with optional per-document limit)
int index = matched.getIndex();
RexInputRef fieldRef = context.rexBuilder.makeInputRef(type, index);

Integer limit = mvExpand.getLimit();
if (limit != null && limit <= 0) {
throw new SemanticCheckException(
String.format("mvexpand limit must be positive, but got %d", limit));
}
buildExpandRelNode(fieldRef, fieldName, fieldName, limit, context);

return relBuilder.peek();
}

private String extractFieldName(Field f) {
UnresolvedExpression inner = f.getField();

if (inner instanceof QualifiedName) {
List<String> parts = ((QualifiedName) inner).getParts();
if (!parts.isEmpty()) {
return String.join(".", parts);
}
}

// Fallback - return clean string
return inner.toString().replace("`", "");
}

@Override
public RelNode visitValues(Values values, CalcitePlanContext context) {
if (values.getValues() == null || values.getValues().isEmpty()) {
Expand Down Expand Up @@ -3342,7 +3439,11 @@ private void flattenParsedPattern(
}

private void buildExpandRelNode(
RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) {
RexInputRef arrayFieldRex,
String arrayFieldName,
String alias,
@Nullable Integer perDocLimit,
CalcitePlanContext context) {
// 3. Capture the outer row in a CorrelationId
Holder<RexCorrelVariable> correlVariable = Holder.empty();
context.relBuilder.variable(correlVariable::set);
Expand All @@ -3357,14 +3458,17 @@ private void buildExpandRelNode(
RelNode leftNode = context.relBuilder.build();

// 5. Build join right node and expand the array field using uncollect
RelNode rightNode =
context
.relBuilder
// fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter
.push(LogicalValues.createOneRow(context.relBuilder.getCluster()))
.project(List.of(correlArrayFieldAccess), List.of(arrayFieldName))
.uncollect(List.of(), false)
.build();
context
.relBuilder
// fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter
.push(LogicalValues.createOneRow(context.relBuilder.getCluster()))
.project(List.of(correlArrayFieldAccess), List.of(arrayFieldName))
.uncollect(List.of(), false);

if (perDocLimit != null) {
context.relBuilder.limit(0, perDocLimit);
}
RelNode rightNode = context.relBuilder.build();

// 6. Perform a nested-loop join (correlate) between the original table and the expanded
// array field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,14 @@ void populate() {
OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER)
.or(OperandTypes.family(SqlTypeFamily.MAP, SqlTypeFamily.ANY)),
false));
// Allow using INTERNAL_ITEM when the element type is unknown/undefined at planning time.
// Some datasets (or Calcite's type inference) may give the element an UNDEFINED type.
// Accept a "ignore" first-argument family so INTERNAL_ITEM(elem, 'key') can still be planned
// and resolved at runtime (fallback semantics handled at execution side). - Used in MVEXPAND
registerOperator(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if missed some context. What is this used for? Please point me to comment related if any.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code defines a fallback type-checker for INTERNAL_ITEM to handle cases where the element type is undefined at planning time. It ensures the operator accepts composite types (IGNORE, CHARACTER) for resilient type validation during query execution.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some datasets (or Calcite's type inference) may give the element an UNDEFINED type

Could you clarify a little more which query/test dataset you have this problem? If any previous comment related, please let me know. Thanks!

Copy link
Author

@srikanthpadakanti srikanthpadakanti Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without that code, tests fail because the INTERNAL_ITEM operator requires specific argument types ({ARRAY, INTEGER} or {STRUCT, ANY}) for type validation during query execution. When Calcite assigns an UNDEFINED type at planning time, the absence of fallback handling in registerOperator causes validation to fail, triggering ExpressionEvaluationException.

With that code, the fallback type-checker accepts UNDEFINED types as valid arguments (IGNORE, CHARACTER), allowing queries to pass type validation, execute correctly, and produce expected results in the tests.

Example - I commented that code and ran my IT below is the failure

testMvexpandDuplicate

org.opensearch.client.ResponseException: method [POST], host [http://127.0.0.1:65246], URI [/_plugins/_ppl], status line [HTTP/1.1 400 Bad Request]
{
"error": {
"reason": "Invalid Query",
"details": "INTERNAL_ITEM function expects {[ARRAY,INTEGER]|[STRUCT,ANY]}, but got [UNDEFINED,STRING]",
"type": "ExpressionEvaluationException"
},
"status": 400
}

INTERNAL_ITEM,
SqlStdOperatorTable.ITEM,
PPLTypeChecker.family(SqlTypeFamily.IGNORE, SqlTypeFamily.CHARACTER));
registerOperator(
XOR,
SqlStdOperatorTable.NOT_EQUALS,
Expand Down
3 changes: 2 additions & 1 deletion docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"user/ppl/cmd/regex.md",
"user/ppl/cmd/rename.md",
"user/ppl/cmd/multisearch.md",
"user/ppl/cmd/mvexpand.md",
"user/ppl/cmd/replace.md",
"user/ppl/cmd/rex.md",
"user/ppl/cmd/search.md",
Expand Down Expand Up @@ -79,4 +80,4 @@
"bash_settings": [
"user/ppl/admin/settings.md"
]
}
}
4 changes: 3 additions & 1 deletion docs/user/dql/metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Example 1: Show All Indices Information
SQL query::

os> SHOW TABLES LIKE '%'
fetched rows / total rows = 23/23
fetched rows / total rows = 24/24
+----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION |
|----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------|
Expand All @@ -48,6 +48,7 @@ SQL query::
| docTestCluster | null | events_many_hosts | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | events_null | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | mvexpand_logs | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | occupation | BASE TABLE | null | null | null | null | null | null |
Expand Down Expand Up @@ -120,3 +121,4 @@ SQL query::
| docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | |
| docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | |
+----------------+-------------+------------+-------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+

Loading
Loading