Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
setup:
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
log:
properties:
url:
properties:
message:
type: text
fields:
keyword:
type: keyword
ignore_above: 256
time:
type: long
message_alias:
type: alias
path: log.url.message
time_alias:
type: alias
path: log.url.time

- do:
query.settings:
body:
transient:
plugins.calcite.enabled : true

---
teardown:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled : false

---
"dedup struct field name with dot":
- skip:
features:
- headers
- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{"log": {"url": {"message": "/e2e/h/zap", "time": 1} } }'
- '{"index": {}}'
- '{"log": {"url": {"message": "/e2e/h/zap", "time": 1} } }'
- '{"index": {}}'
- '{"log": {"url": {"message": "/e2e/h/zap", "time": 2} } }'
- '{"index": {}}'
- '{"log": {"url": {"message": "/e2e/h/zap", "time": 2} } }'
- '{"index": {}}'
- '{"log": {"url": {"message": "/e2e/h/aap", "time": 1} } }'
- '{"index": {}}'
- '{"log": {"url": {"message": "/e2e/h/aap", "time": 1} } }'
- '{"index": {}}'
- '{"log": {"url": {"time": 1} } }'
- '{"index": {}}'
- '{"log": {"url": {"time": 1} } }'
- '{"index": {}}'
- '{"log": {"url": {"time": 2} } }'
- '{"index": {}}'
- '{"log": {"url": {"time": 2} } }'
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: 'source=test | dedup log.url.time'
- match: {"total": 2}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.opensearch.sql.calcite.plan.Scannable;
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
import org.opensearch.sql.opensearch.storage.OpenSearchIndex;
import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil;

/** The physical relational operator representing a scan of an OpenSearchIndex type. */
public class CalciteEnumerableIndexScan extends AbstractCalciteIndexScan
Expand Down Expand Up @@ -87,9 +88,14 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) {
* let's follow this convention to apply the optimization here and ensure `scan` method
* returns the correct data format for single column rows.
* See {@link OpenSearchIndexEnumerator}
* Besides, we replace all dots in fields to avoid the Calcite codegen bug.
* https://github.com/opensearch-project/sql/issues/4619
*/
PhysType physType =
PhysTypeImpl.of(implementor.getTypeFactory(), getRowType(), pref.preferArray());
PhysTypeImpl.of(
implementor.getTypeFactory(),
OpenSearchRelOptUtil.replaceDot(getCluster().getTypeFactory(), getRowType()),
pref.preferArray());

Expression scanOperator = implementor.stash(this, CalciteEnumerableIndexScan.class);
return implementor.result(physType, Blocks.toBlock(Expressions.call(scanOperator, "scan")));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import lombok.experimental.UtilityClass;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rel.type.RelDataTypeFieldImpl;
import org.apache.calcite.rex.RexBiVisitorImpl;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
Expand Down Expand Up @@ -205,4 +208,70 @@ public Void visitInputRef(RexInputRef inputRef, Pair<BitSet, List<Integer>> args
return null;
}
}

/**
* Replace dot in field name with underscore, since Calcite has bug in codegen if a field name
* contains dot.
*
* <p>Fields replacement examples:
*
* <p>a_b, a.b -> a_b, a_b0
*
* <p>a_b, a_b0, a.b -> a_b, a_b0, a_b1
*
* <p>a_b, a_b1, a.b -> a_b, a_b1, a_b0
*
* <p>a_b0, a.b0, a.b1 -> a_b0, a_b00, a_b1
*
* @param rowType RowType
* @return RowType with field name replaced
*/
public RelDataType replaceDot(RelDataTypeFactory typeFactory, RelDataType rowType) {
final RelDataTypeFactory.Builder builder = typeFactory.builder();
final List<RelDataTypeField> fieldList = rowType.getFieldList();
List<String> originalNames = new ArrayList<>();
for (RelDataTypeField field : fieldList) {
originalNames.add(field.getName());
}
List<String> resolvedNames = OpenSearchRelOptUtil.resolveColumnNameConflicts(originalNames);
for (int i = 0; i < fieldList.size(); i++) {
RelDataTypeField field = fieldList.get(i);
builder.add(
new RelDataTypeFieldImpl(resolvedNames.get(i), field.getIndex(), field.getType()));
}
return builder.build();
}

public static List<String> resolveColumnNameConflicts(List<String> originalNames) {
List<String> result = new ArrayList<>(originalNames);
Set<String> usedNames = new HashSet<>(originalNames);
for (int i = 0; i < originalNames.size(); i++) {
String originalName = originalNames.get(i);
if (originalName.contains(".")) {
String baseName = originalName.replace('.', '_');
String newName = generateUniqueName(baseName, usedNames);
result.set(i, newName);
usedNames.add(newName);
}
}
return result;
}

private static String generateUniqueName(String baseName, Set<String> usedNames) {
if (!usedNames.contains(baseName)) {
return baseName;
}
String candidate = baseName + "0";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit]seems can be merged with below code

if (!usedNames.contains(candidate)) {
return candidate;
}
int suffix = 1;
while (true) {
candidate = baseName + suffix;
if (!usedNames.contains(candidate)) {
return candidate;
}
suffix++;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
Expand Down Expand Up @@ -243,4 +245,85 @@ private void assertExpectedInputInfo(
assertEquals(index, result.get().getLeft().intValue());
assertEquals(flipped, result.get().getRight());
}

@Test
public void testScenario1() {
List<String> input = Arrays.asList("a_b", "a.b");
List<String> expected = Arrays.asList("a_b", "a_b0");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testScenario2() {
List<String> input = Arrays.asList("a_b", "a_b0", "a.b");
List<String> expected = Arrays.asList("a_b", "a_b0", "a_b1");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testScenario3() {
List<String> input = Arrays.asList("a_b", "a_b1", "a.b");
List<String> expected = Arrays.asList("a_b", "a_b1", "a_b0");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testScenario4() {
List<String> input = Arrays.asList("a_b0", "a.b0", "a.b1");
List<String> expected = Arrays.asList("a_b0", "a_b00", "a_b1");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testMultipleDots() {
List<String> input = Arrays.asList("a.b.c", "a_b_c", "a.b.c");
List<String> expected = Arrays.asList("a_b_c0", "a_b_c", "a_b_c1");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testComplexScenario() {
// 多级冲突处理
List<String> input = Arrays.asList("x", "x", "x", "x");
List<String> expected = Arrays.asList("x", "x", "x", "x");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testNoConflict() {
List<String> input = Arrays.asList("col1", "col2", "col3");
List<String> expected = Arrays.asList("col1", "col2", "col3");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testMixedConflict() {
List<String> input = Arrays.asList("a.b", "a_b", "a.b", "a_b0");
List<String> expected = Arrays.asList("a_b1", "a_b", "a_b2", "a_b0");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testOriginalNamesPreserved() {
List<String> input = Arrays.asList("endpoint.ip", "account.id", "timestamp");
List<String> expected = Arrays.asList("endpoint_ip", "account_id", "timestamp");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}

@Test
public void testNoDots() {
List<String> input = Arrays.asList("col1", "col2", "col3");
List<String> expected = Arrays.asList("col1", "col2", "col3");
List<String> result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input);
assertEquals(expected, result);
}
}
Loading