diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4619.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4619.yml new file mode 100644 index 00000000000..bd38eb532ae --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4619.yml @@ -0,0 +1,80 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + log: + properties: + url: + properties: + message: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + time: + type: long + message_alias: + type: alias + path: log.url.message + time_alias: + type: alias + path: log.url.time + + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"dedup struct field name with dot": + - skip: + features: + - headers + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"log": {"url": {"message": "/e2e/h/zap", "time": 1} } }' + - '{"index": {}}' + - '{"log": {"url": {"message": "/e2e/h/zap", "time": 1} } }' + - '{"index": {}}' + - '{"log": {"url": {"message": "/e2e/h/zap", "time": 2} } }' + - '{"index": {}}' + - '{"log": {"url": {"message": "/e2e/h/zap", "time": 2} } }' + - '{"index": {}}' + - '{"log": {"url": {"message": "/e2e/h/aap", "time": 1} } }' + - '{"index": {}}' + - '{"log": {"url": {"message": "/e2e/h/aap", "time": 1} } }' + - '{"index": {}}' + - '{"log": {"url": {"time": 1} } }' + - '{"index": {}}' + - '{"log": {"url": {"time": 1} } }' + - '{"index": {}}' + - '{"log": {"url": {"time": 2} } }' + - '{"index": {}}' + - '{"log": {"url": {"time": 2} } }' + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test | dedup log.url.time' + - match: {"total": 2} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index ea5ae4d760c..5fd104cc794 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -34,6 +34,7 @@ import org.opensearch.sql.calcite.plan.Scannable; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; +import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; /** The physical relational operator representing a scan of an OpenSearchIndex type. */ public class CalciteEnumerableIndexScan extends AbstractCalciteIndexScan @@ -90,9 +91,14 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { * let's follow this convention to apply the optimization here and ensure `scan` method * returns the correct data format for single column rows. * See {@link OpenSearchIndexEnumerator} + * Besides, we replace all dots in fields to avoid the Calcite codegen bug. + * https://github.com/opensearch-project/sql/issues/4619 */ PhysType physType = - PhysTypeImpl.of(implementor.getTypeFactory(), getRowType(), pref.preferArray()); + PhysTypeImpl.of( + implementor.getTypeFactory(), + OpenSearchRelOptUtil.replaceDot(getCluster().getTypeFactory(), getRowType()), + pref.preferArray()); Expression scanOperator = implementor.stash(this, CalciteEnumerableIndexScan.class); return implementor.result(physType, Blocks.toBlock(Expressions.call(scanOperator, "scan"))); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java index 8b04392008b..bd1e626a332 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java @@ -8,12 +8,15 @@ import java.math.BigDecimal; import java.util.ArrayList; import java.util.BitSet; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import lombok.experimental.UtilityClass; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rel.type.RelDataTypeFieldImpl; import org.apache.calcite.rex.RexBiVisitorImpl; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; @@ -223,4 +226,70 @@ public Void visitInputRef(RexInputRef inputRef, Pair> args return null; } } + + /** + * Replace dot in field name with underscore, since Calcite has bug in codegen if a field name + * contains dot. + * + *

Fields replacement examples: + * + *

a_b, a.b -> a_b, a_b0 + * + *

a_b, a_b0, a.b -> a_b, a_b0, a_b1 + * + *

a_b, a_b1, a.b -> a_b, a_b1, a_b0 + * + *

a_b0, a.b0, a.b1 -> a_b0, a_b00, a_b1 + * + * @param rowType RowType + * @return RowType with field name replaced + */ + public RelDataType replaceDot(RelDataTypeFactory typeFactory, RelDataType rowType) { + final RelDataTypeFactory.Builder builder = typeFactory.builder(); + final List fieldList = rowType.getFieldList(); + List originalNames = new ArrayList<>(); + for (RelDataTypeField field : fieldList) { + originalNames.add(field.getName()); + } + List resolvedNames = OpenSearchRelOptUtil.resolveColumnNameConflicts(originalNames); + for (int i = 0; i < fieldList.size(); i++) { + RelDataTypeField field = fieldList.get(i); + builder.add( + new RelDataTypeFieldImpl(resolvedNames.get(i), field.getIndex(), field.getType())); + } + return builder.build(); + } + + public static List resolveColumnNameConflicts(List originalNames) { + List result = new ArrayList<>(originalNames); + Set usedNames = new HashSet<>(originalNames); + for (int i = 0; i < originalNames.size(); i++) { + String originalName = originalNames.get(i); + if (originalName.contains(".")) { + String baseName = originalName.replace('.', '_'); + String newName = generateUniqueName(baseName, usedNames); + result.set(i, newName); + usedNames.add(newName); + } + } + return result; + } + + private static String generateUniqueName(String baseName, Set usedNames) { + if (!usedNames.contains(baseName)) { + return baseName; + } + String candidate = baseName + "0"; + if (!usedNames.contains(candidate)) { + return candidate; + } + int suffix = 1; + while (true) { + candidate = baseName + suffix; + if (!usedNames.contains(candidate)) { + return candidate; + } + suffix++; + } + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java index 60cfba5ba7e..a9790d6485e 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java @@ -9,6 +9,8 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.Arrays; +import java.util.List; import java.util.Optional; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -243,4 +245,84 @@ private void assertExpectedInputInfo( assertEquals(index, result.get().getLeft().intValue()); assertEquals(flipped, result.get().getRight()); } + + @Test + public void testScenario1() { + List input = Arrays.asList("a_b", "a.b"); + List expected = Arrays.asList("a_b", "a_b0"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testScenario2() { + List input = Arrays.asList("a_b", "a_b0", "a.b"); + List expected = Arrays.asList("a_b", "a_b0", "a_b1"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testScenario3() { + List input = Arrays.asList("a_b", "a_b1", "a.b"); + List expected = Arrays.asList("a_b", "a_b1", "a_b0"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testScenario4() { + List input = Arrays.asList("a_b0", "a.b0", "a.b1"); + List expected = Arrays.asList("a_b0", "a_b00", "a_b1"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testMultipleDots() { + List input = Arrays.asList("a.b.c", "a_b_c", "a.b.c"); + List expected = Arrays.asList("a_b_c0", "a_b_c", "a_b_c1"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testComplexScenario() { + List input = Arrays.asList("x", "x", "x", "x"); + List expected = Arrays.asList("x", "x", "x", "x"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testNoConflict() { + List input = Arrays.asList("col1", "col2", "col3"); + List expected = Arrays.asList("col1", "col2", "col3"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testMixedConflict() { + List input = Arrays.asList("a.b", "a_b", "a.b", "a_b0"); + List expected = Arrays.asList("a_b1", "a_b", "a_b2", "a_b0"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testOriginalNamesPreserved() { + List input = Arrays.asList("endpoint.ip", "account.id", "timestamp"); + List expected = Arrays.asList("endpoint_ip", "account_id", "timestamp"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } + + @Test + public void testNoDots() { + List input = Arrays.asList("col1", "col2", "col3"); + List expected = Arrays.asList("col1", "col2", "col3"); + List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); + assertEquals(expected, result); + } }