diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index fd11dd1f6b4..3efdc3fd514 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -86,6 +86,7 @@ import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; +import org.opensearch.sql.ast.tree.Search; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.ast.tree.SubqueryAlias; @@ -278,6 +279,18 @@ public LogicalPlan visitLimit(Limit node, AnalysisContext context) { return new LogicalLimit(child, node.getLimit(), node.getOffset()); } + @Override + public LogicalPlan visitSearch(Search node, AnalysisContext context) { + LogicalPlan child = node.getChild().get(0).accept(this, context); + Function queryStringFunc = + AstDSL.function( + "query_string", + AstDSL.unresolvedArg("query", AstDSL.stringLiteral(node.getQueryString()))); + + Expression analyzed = expressionAnalyzer.analyze(queryStringFunc, context); + return new LogicalFilter(child, analyzed); + } + @Override public LogicalPlan visitFilter(Filter node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a2d54d3ec05..84f7bdbd4a6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -75,6 +75,7 @@ import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; +import org.opensearch.sql.ast.tree.Search; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; @@ -131,6 +132,10 @@ public T visitTableFunction(TableFunction node, C context) { return visitChildren(node, context); } + public T visitSearch(Search node, C context) { + return visitChildren(node, context); + } + public T visitFilter(Filter node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index a4c5b2f5e01..c8600bb9809 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -71,6 +71,7 @@ import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.SPath; +import org.opensearch.sql.ast.tree.Search; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.ast.tree.SpanBin; @@ -109,6 +110,10 @@ public UnresolvedPlan describe(String tableName) { return new DescribeRelation(qualifiedName(tableName)); } + public static UnresolvedPlan search(UnresolvedPlan input, String queryString) { + return new Search(input, queryString); + } + public UnresolvedPlan subqueryAlias(UnresolvedPlan child, String alias) { return new SubqueryAlias(child, alias); } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java new file mode 100644 index 00000000000..fe7be0db13e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java @@ -0,0 +1,34 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.Arrays; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +/** Search expression for AND operator. */ +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchAnd extends SearchExpression { + + private final SearchExpression left; + private final SearchExpression right; + + @Override + public String toQueryString() { + return left.toQueryString() + " AND " + right.toQueryString(); + } + + @Override + public List getChild() { + return Arrays.asList(left, right); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java new file mode 100644 index 00000000000..be099059546 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java @@ -0,0 +1,70 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.Arrays; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.utils.QueryStringUtils; + +/** Search expression for field comparisons. */ +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchComparison extends SearchExpression { + + public enum Operator { + EQUALS("="), + NOT_EQUALS("!="), + LESS_THAN("<"), + LESS_OR_EQUAL("<="), + GREATER_THAN(">"), + GREATER_OR_EQUAL(">="); + + private final String symbol; + + Operator(String symbol) { + this.symbol = symbol; + } + + public String getSymbol() { + return symbol; + } + } + + private final Field field; + private final Operator operator; + private final SearchLiteral value; + + @Override + public String toQueryString() { + String fieldName = QueryStringUtils.escapeFieldName(field.getField().toString()); + String valueStr = value.toQueryString(); + switch (operator) { + case NOT_EQUALS: + return "( _exists_:" + fieldName + " AND NOT " + fieldName + ":" + valueStr + " )"; + case GREATER_THAN: + return fieldName + ":>" + valueStr; + case GREATER_OR_EQUAL: + return fieldName + ":>=" + valueStr; + case LESS_THAN: + return fieldName + ":<" + valueStr; + case LESS_OR_EQUAL: + return fieldName + ":<=" + valueStr; + default: + return fieldName + ":" + valueStr; + } + } + + @Override + public List getChild() { + return Arrays.asList(field, value); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java new file mode 100644 index 00000000000..23bf806d230 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java @@ -0,0 +1,24 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** Base class for search expressions that get converted to query_string syntax. */ +public abstract class SearchExpression extends UnresolvedExpression { + + /** + * Convert this search expression to query_string syntax. + * + * @return the query string representation + */ + public abstract String toQueryString(); + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitChildren(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java new file mode 100644 index 00000000000..41b85f408ca --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java @@ -0,0 +1,33 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.Collections; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +/** Search expression for grouped expressions (parentheses). */ +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchGroup extends SearchExpression { + + private final SearchExpression expression; + + @Override + public String toQueryString() { + return "(" + expression.toQueryString() + ")"; + } + + @Override + public List getChild() { + return Collections.singletonList(expression); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java new file mode 100644 index 00000000000..bdbdb712a9c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.utils.QueryStringUtils; + +/** Search expression for IN operator. */ +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchIn extends SearchExpression { + + private final Field field; + private final List values; + + @Override + public String toQueryString() { + String fieldName = QueryStringUtils.escapeFieldName(field.getField().toString()); + String valueList = + values.stream().map(SearchLiteral::toQueryString).collect(Collectors.joining(" OR ")); + + return fieldName + ":( " + valueList + " )"; + } + + @Override + public List getChild() { + List children = new ArrayList<>(); + children.add(field); + // SearchLiteral extends SearchExpression which extends UnresolvedExpression + children.addAll(values); + return children; + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java new file mode 100644 index 00000000000..f27dad34bab --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java @@ -0,0 +1,62 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.Collections; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.utils.QueryStringUtils; + +/** Search expression for standalone literals. */ +@Getter +@AllArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchLiteral extends SearchExpression { + + private final UnresolvedExpression literal; + private final boolean isPhrase; + + @Override + public String toQueryString() { + if (literal instanceof Literal) { + Literal lit = (Literal) literal; + Object val = lit.getValue(); + + // Numbers don't need escaping + if (val instanceof Number) { + return val.toString(); + } + + // Strings + if (val instanceof String) { + String str = (String) val; + + // Phrase search - preserve quotes + if (isPhrase) { + // Escape special chars inside the phrase + str = QueryStringUtils.escapeLuceneSpecialCharacters(str); + return "\"" + str + "\""; + } + + // Regular string - escape special characters + return QueryStringUtils.escapeLuceneSpecialCharacters(str); + } + } + + // Default: escape the text representation + String text = literal.toString(); + return QueryStringUtils.escapeLuceneSpecialCharacters(text); + } + + @Override + public List getChild() { + return Collections.singletonList(literal); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java new file mode 100644 index 00000000000..3e5083b7678 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java @@ -0,0 +1,33 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.Collections; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +/** Search expression for NOT operator. */ +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchNot extends SearchExpression { + + private final SearchExpression expression; + + @Override + public String toQueryString() { + return "NOT(" + expression.toQueryString() + ")"; + } + + @Override + public List getChild() { + return Collections.singletonList(expression); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java new file mode 100644 index 00000000000..b5aa72807bd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java @@ -0,0 +1,34 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import java.util.Arrays; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +/** Search expression for OR operator. */ +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class SearchOr extends SearchExpression { + + private final SearchExpression left; + private final SearchExpression right; + + @Override + public String toQueryString() { + return left.toQueryString() + " OR " + right.toQueryString(); + } + + @Override + public List getChild() { + return Arrays.asList(left, right); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Search.java b/core/src/main/java/org/opensearch/sql/ast/tree/Search.java new file mode 100644 index 00000000000..ebc74192987 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Search.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** + * Logical plan node for Search operation. Represents search expressions that get converted to + * query_string function. + */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Search extends UnresolvedPlan { + + private final UnresolvedPlan child; + private final String queryString; + + @Override + public List getChild() { + return ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitSearch(this, context); + } + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + return new Search(child, queryString); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 211d6bf1a27..faa8a8fa6f9 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -114,6 +114,7 @@ import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; +import org.opensearch.sql.ast.tree.Search; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.ast.tree.SubqueryAlias; @@ -170,6 +171,21 @@ private RelBuilder scan(RelOptTable tableSchema, CalcitePlanContext context) { return context.relBuilder; } + @Override + public RelNode visitSearch(Search node, CalcitePlanContext context) { + // Visit the Relation child to get the scan + node.getChild().get(0).accept(this, context); + // Create query_string function + Function queryStringFunc = + AstDSL.function( + "query_string", + AstDSL.unresolvedArg("query", AstDSL.stringLiteral(node.getQueryString()))); + RexNode queryStringRex = rexVisitor.analyze(queryStringFunc, context); + + context.relBuilder.filter(queryStringRex); + return context.relBuilder.peek(); + } + @Override public RelNode visitFilter(Filter node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java b/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java new file mode 100644 index 00000000000..fd9c4b7d2e3 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java @@ -0,0 +1,57 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +/** Utility class for query_string syntax operations. */ +public class QueryStringUtils { + + // For field names, we typically don't escape dots as they're used for nested fields + // But we escape other special characters + public static final String LUCENE_SPECIAL_CHARS = "+-&|!(){}[]^\"~:/"; + + /** + * Escape field name for query_string syntax. Only spaces need to be escaped in field names. Other + * special characters are handled automatically by the query parser in field position. + * + * @param fieldName the field name to escape + * @return escaped field name + */ + public static String escapeFieldName(String fieldName) { + // Only escape spaces in field names + return fieldName.replace(" ", "\\ "); + } + + /** + * Escape Lucene/query_string special characters. Special characters: + - && || ! ( ) { } [ ] ^ " + * ~ : / Note: * and ? are NOT escaped to allow wildcard pattern matching + * + * @param text the text to escape + * @return escaped text with wildcards preserved + */ + public static String escapeLuceneSpecialCharacters(String text) { + StringBuilder escaped = new StringBuilder(); + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + + // Check if this is a special character that needs escaping + if (LUCENE_SPECIAL_CHARS.indexOf(c) >= 0) { + // Special handling for && and || + if ((c == '&' || c == '|') && i + 1 < text.length() && text.charAt(i + 1) == c) { + // Escape double && or || + escaped.append('\\').append(c).append('\\').append(c); + i++; // Skip next character as we've handled it + } else { + // Escape single special character + escaped.append('\\').append(c); + } + } else { + escaped.append(c); + } + } + + return escaped.toString(); + } +} diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerSearchTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerSearchTest.java new file mode 100644 index 00000000000..aa780a8fb08 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerSearchTest.java @@ -0,0 +1,226 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.analysis; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.tree.Relation; +import org.opensearch.sql.ast.tree.Search; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.function.BuiltinFunctionRepository; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.logical.LogicalRelation; +import org.opensearch.sql.storage.Table; + +public class AnalyzerSearchTest { + + private Analyzer analyzer; + private ExpressionAnalyzer expressionAnalyzer; + private DataSourceService dataSourceService; + private BuiltinFunctionRepository repository; + private AnalysisContext context; + private Table mockTable; + + @BeforeEach + public void setUp() { + expressionAnalyzer = mock(ExpressionAnalyzer.class); + dataSourceService = mock(DataSourceService.class); + repository = mock(BuiltinFunctionRepository.class); + context = new AnalysisContext(); + mockTable = mock(Table.class); + + analyzer = new Analyzer(expressionAnalyzer, dataSourceService, repository); + + // Setup table with field types + when(mockTable.getFieldTypes()) + .thenReturn( + ImmutableMap.of( + "field1", ExprCoreType.STRING, + "field2", ExprCoreType.STRING)); + when(mockTable.getReservedFieldTypes()).thenReturn(ImmutableMap.of()); + } + + @Test + public void testVisitSearchWithSimpleQuery() { + // Arrange + String queryString = "field1:value1"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + Search searchNode = new Search(relation, queryString); + + LogicalRelation logicalRelation = new LogicalRelation("test_index", mockTable); + FunctionExpression queryStringExpr = mock(FunctionExpression.class); + + // Mock DataSource and StorageEngine + org.opensearch.sql.datasource.model.DataSource mockDataSource = + mock(org.opensearch.sql.datasource.model.DataSource.class); + org.opensearch.sql.storage.StorageEngine mockStorageEngine = + mock(org.opensearch.sql.storage.StorageEngine.class); + when(dataSourceService.getDataSource(any())).thenReturn(mockDataSource); + when(mockDataSource.getStorageEngine()).thenReturn(mockStorageEngine); + when(mockStorageEngine.getTable(any(), any())).thenReturn(mockTable); + + // Mock the child plan processing + when(expressionAnalyzer.analyze(any(Function.class), eq(context))).thenReturn(queryStringExpr); + + // Act + LogicalPlan result = analyzer.visitSearch(searchNode, context); + + // Assert + assertNotNull(result); + assertTrue(result instanceof LogicalFilter); + + // Verify query_string function was created + ArgumentCaptor functionCaptor = ArgumentCaptor.forClass(Function.class); + verify(expressionAnalyzer).analyze(functionCaptor.capture(), eq(context)); + + Function capturedFunction = functionCaptor.getValue(); + assertEquals("query_string", capturedFunction.getFuncName()); + assertEquals(1, capturedFunction.getFuncArgs().size()); + } + + @Test + public void testVisitSearchWithComplexQuery() { + // Arrange + String queryString = "(field1:value1 OR field2:value2) AND NOT field3:value3"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + Search searchNode = new Search(relation, queryString); + + FunctionExpression queryStringExpr = mock(FunctionExpression.class); + when(queryStringExpr.type()).thenReturn(ExprCoreType.BOOLEAN); + + // Mock DataSource and StorageEngine + org.opensearch.sql.datasource.model.DataSource mockDataSource = + mock(org.opensearch.sql.datasource.model.DataSource.class); + org.opensearch.sql.storage.StorageEngine mockStorageEngine = + mock(org.opensearch.sql.storage.StorageEngine.class); + when(dataSourceService.getDataSource(any())).thenReturn(mockDataSource); + when(mockDataSource.getStorageEngine()).thenReturn(mockStorageEngine); + when(mockStorageEngine.getTable(any(), any())).thenReturn(mockTable); + + when(expressionAnalyzer.analyze(any(Function.class), eq(context))).thenReturn(queryStringExpr); + + // Act + LogicalPlan result = analyzer.visitSearch(searchNode, context); + + // Assert + assertNotNull(result); + assertTrue(result instanceof LogicalFilter); + LogicalFilter filter = (LogicalFilter) result; + assertEquals(queryStringExpr, filter.getCondition()); + } + + @Test + public void testVisitSearchPreservesChildPlan() { + // Arrange + String queryString = "test:query"; + UnresolvedPlan mockChild = mock(UnresolvedPlan.class); + Search searchNode = new Search(mockChild, queryString); + + LogicalPlan mockLogicalPlan = mock(LogicalPlan.class); + FunctionExpression queryStringExpr = mock(FunctionExpression.class); + + when(mockChild.accept(analyzer, context)).thenReturn(mockLogicalPlan); + when(expressionAnalyzer.analyze(any(Function.class), eq(context))).thenReturn(queryStringExpr); + + // Act + LogicalPlan result = analyzer.visitSearch(searchNode, context); + + // Assert + assertNotNull(result); + assertTrue(result instanceof LogicalFilter); + LogicalFilter filter = (LogicalFilter) result; + assertEquals(mockLogicalPlan, filter.getChild().get(0)); + assertEquals(queryStringExpr, filter.getCondition()); + } + + @Test + public void testVisitSearchWithEmptyQuery() { + // Arrange + String queryString = ""; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + Search searchNode = new Search(relation, queryString); + + FunctionExpression queryStringExpr = mock(FunctionExpression.class); + + // Mock DataSource and StorageEngine + org.opensearch.sql.datasource.model.DataSource mockDataSource = + mock(org.opensearch.sql.datasource.model.DataSource.class); + org.opensearch.sql.storage.StorageEngine mockStorageEngine = + mock(org.opensearch.sql.storage.StorageEngine.class); + when(dataSourceService.getDataSource(any())).thenReturn(mockDataSource); + when(mockDataSource.getStorageEngine()).thenReturn(mockStorageEngine); + when(mockStorageEngine.getTable(any(), any())).thenReturn(mockTable); + + when(expressionAnalyzer.analyze(any(Function.class), eq(context))).thenReturn(queryStringExpr); + + // Act + LogicalPlan result = analyzer.visitSearch(searchNode, context); + + // Assert + assertNotNull(result); + assertTrue(result instanceof LogicalFilter); + + // Verify empty query string was passed + ArgumentCaptor functionCaptor = ArgumentCaptor.forClass(Function.class); + verify(expressionAnalyzer).analyze(functionCaptor.capture(), eq(context)); + + Function capturedFunction = functionCaptor.getValue(); + assertEquals("query_string", capturedFunction.getFuncName()); + } + + @Test + public void testVisitSearchCreatesCorrectQueryStringFunction() { + // Arrange + String queryString = "field:\"exact phrase\" AND field2:wildcard*"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + Search searchNode = new Search(relation, queryString); + + FunctionExpression queryStringExpr = mock(FunctionExpression.class); + + // Mock DataSource and StorageEngine + org.opensearch.sql.datasource.model.DataSource mockDataSource = + mock(org.opensearch.sql.datasource.model.DataSource.class); + org.opensearch.sql.storage.StorageEngine mockStorageEngine = + mock(org.opensearch.sql.storage.StorageEngine.class); + when(dataSourceService.getDataSource(any())).thenReturn(mockDataSource); + when(mockDataSource.getStorageEngine()).thenReturn(mockStorageEngine); + when(mockStorageEngine.getTable(any(), any())).thenReturn(mockTable); + + when(expressionAnalyzer.analyze(any(Function.class), eq(context))).thenReturn(queryStringExpr); + + // Act + analyzer.visitSearch(searchNode, context); + + // Assert - verify the function structure + ArgumentCaptor functionCaptor = ArgumentCaptor.forClass(Function.class); + verify(expressionAnalyzer).analyze(functionCaptor.capture(), eq(context)); + + Function capturedFunction = functionCaptor.getValue(); + assertEquals("query_string", capturedFunction.getFuncName()); + assertEquals(1, capturedFunction.getFuncArgs().size()); + + // The function should have an unresolvedArg with "query" name and the query string as value + var funcArg = capturedFunction.getFuncArgs().get(0); + assertNotNull(funcArg); + } +} diff --git a/core/src/test/java/org/opensearch/sql/ast/tree/SearchTest.java b/core/src/test/java/org/opensearch/sql/ast/tree/SearchTest.java new file mode 100644 index 00000000000..c6808d502ff --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/ast/tree/SearchTest.java @@ -0,0 +1,144 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +public class SearchTest { + + private UnresolvedPlan mockChild; + private Search search; + private String testQueryString; + + @BeforeEach + public void setUp() { + mockChild = mock(UnresolvedPlan.class); + testQueryString = "field1:value1 AND field2:value2"; + search = new Search(mockChild, testQueryString); + } + + @Test + public void testConstructor() { + assertNotNull(search); + assertEquals(mockChild, search.getChild().get(0)); + assertEquals(testQueryString, search.getQueryString()); + } + + @Test + public void testGetChild() { + List children = search.getChild(); + assertNotNull(children); + assertEquals(1, children.size()); + assertEquals(mockChild, children.get(0)); + assertTrue(children instanceof ImmutableList); + } + + @Test + public void testGetQueryString() { + assertEquals(testQueryString, search.getQueryString()); + } + + @Test + public void testAccept() { + AbstractNodeVisitor mockVisitor = mock(AbstractNodeVisitor.class); + Object mockContext = new Object(); + when(mockVisitor.visitSearch(search, mockContext)).thenReturn("visited"); + + String result = search.accept(mockVisitor, mockContext); + + assertEquals("visited", result); + verify(mockVisitor).visitSearch(search, mockContext); + } + + @Test + public void testEquals() { + UnresolvedPlan sameChild = mockChild; + String sameQueryString = testQueryString; + Search sameSearch = new Search(sameChild, sameQueryString); + + assertEquals(search, sameSearch); + assertEquals(search, search); + } + + @Test + public void testNotEqualsWithDifferentChild() { + UnresolvedPlan differentChild = mock(UnresolvedPlan.class); + Search differentSearch = new Search(differentChild, testQueryString); + + assertNotEquals(search, differentSearch); + } + + @Test + public void testNotEqualsWithDifferentQueryString() { + Search differentSearch = new Search(mockChild, "different:query"); + + assertNotEquals(search, differentSearch); + } + + @Test + public void testNotEqualsWithNull() { + assertNotEquals(search, null); + } + + @Test + public void testNotEqualsWithDifferentClass() { + assertNotEquals(search, "not a Search object"); + } + + @Test + public void testHashCode() { + Search sameSearch = new Search(mockChild, testQueryString); + assertEquals(search.hashCode(), sameSearch.hashCode()); + } + + @Test + public void testHashCodeWithDifferentValues() { + UnresolvedPlan differentChild = mock(UnresolvedPlan.class); + Search differentSearch = new Search(differentChild, "different:query"); + assertNotEquals(search.hashCode(), differentSearch.hashCode()); + } + + @Test + public void testToString() { + String toStringResult = search.toString(); + assertNotNull(toStringResult); + assertTrue(toStringResult.contains("Search")); + assertTrue(toStringResult.contains("queryString=" + testQueryString)); + } + + @Test + public void testWithEmptyQueryString() { + Search emptySearch = new Search(mockChild, ""); + assertEquals("", emptySearch.getQueryString()); + assertEquals(mockChild, emptySearch.getChild().get(0)); + } + + @Test + public void testWithComplexQueryString() { + String complexQuery = "(field1:value1 OR field2:value2) AND NOT field3:value3"; + Search complexSearch = new Search(mockChild, complexQuery); + assertEquals(complexQuery, complexSearch.getQueryString()); + } + + @Test + public void testWithSpecialCharactersInQueryString() { + String specialCharsQuery = "field:\"value with spaces\" AND field2:value*"; + Search specialSearch = new Search(mockChild, specialCharsQuery); + assertEquals(specialCharsQuery, specialSearch.getQueryString()); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorSearchSimpleTest.java b/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorSearchSimpleTest.java new file mode 100644 index 00000000000..b2de7c050fe --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorSearchSimpleTest.java @@ -0,0 +1,102 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.tree.Relation; +import org.opensearch.sql.ast.tree.Search; + +/** + * Simple tests for CalciteRelNodeVisitor.visitSearch method. Tests basic functionality without + * complex mocking. + */ +public class CalciteRelNodeVisitorSearchSimpleTest { + + private CalciteRelNodeVisitor visitor; + + @BeforeEach + public void setUp() { + visitor = new CalciteRelNodeVisitor(); + } + + @Test + public void testVisitSearchRequiresContext() { + // Arrange + String queryString = "field:value"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + Search searchNode = new Search(relation, queryString); + + // Act & Assert - should throw NPE without proper context + assertThrows( + NullPointerException.class, + () -> { + visitor.visitSearch(searchNode, null); + }); + } + + @Test + public void testSearchNodeStructure() { + // Arrange + String queryString = "field1:value1 AND field2:value2"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + + // Act + Search searchNode = new Search(relation, queryString); + + // Assert + assertNotNull(searchNode); + assertEquals(queryString, searchNode.getQueryString()); + assertNotNull(searchNode.getChild()); + assertEquals(1, searchNode.getChild().size()); + assertEquals(relation, searchNode.getChild().get(0)); + } + + @Test + public void testSearchWithEmptyQuery() { + // Arrange + String queryString = ""; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + + // Act + Search searchNode = new Search(relation, queryString); + + // Assert + assertEquals("", searchNode.getQueryString()); + assertEquals(relation, searchNode.getChild().get(0)); + } + + @Test + public void testSearchWithComplexQuery() { + // Arrange + String queryString = "(field1:value1 OR field2:value2) AND NOT field3:value3"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + + // Act + Search searchNode = new Search(relation, queryString); + + // Assert + assertEquals(queryString, searchNode.getQueryString()); + } + + @Test + public void testSearchWithSpecialCharacters() { + // Arrange + String queryString = "field:\"value with spaces\" AND field2:wildcard*"; + Relation relation = new Relation(AstDSL.qualifiedName("test_index")); + + // Act + Search searchNode = new Search(relation, queryString); + + // Assert + assertEquals(queryString, searchNode.getQueryString()); + } +} diff --git a/docs/category.json b/docs/category.json index 38c16255d03..4eb954062a8 100644 --- a/docs/category.json +++ b/docs/category.json @@ -62,6 +62,7 @@ "user/ppl/cmd/rename.rst", "user/ppl/cmd/rex.rst", "user/ppl/cmd/stats.rst", - "user/ppl/cmd/timechart.rst" + "user/ppl/cmd/timechart.rst", + "user/ppl/cmd/search.rst" ] } diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 9e55ab9885b..b34d5f2c3bc 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 16/16 + fetched rows / total rows = 17/17 +----------------+-------------+------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -49,6 +49,7 @@ SQL query:: | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | occupation | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | otellogs | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | state_country | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | weblogs | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/search.rst b/docs/user/ppl/cmd/search.rst index 9e55daddeb1..45883fe7cc0 100644 --- a/docs/user/ppl/cmd/search.rst +++ b/docs/user/ppl/cmd/search.rst @@ -16,49 +16,457 @@ Description Syntax ============ -search source=[:] [boolean-expression] +search source=[:] [search-expression] -* search: search keywords, which could be ignore. +* search: search keyword, which could be ignored. * index: mandatory. search command must specify which index to query from. The index name can be prefixed by ":" for cross-cluster search. -* bool-expression: optional. any expression which could be evaluated to boolean value. +* search-expression: optional. Search expression that gets converted to OpenSearch `query_string `_ function which uses `Lucene Query Syntax `_. +Search Expression +================= + +The search expression syntax supports: + +* **Full text search**: ``error`` or ``"error message"`` - Searches the default field configured by the ``index.query.default_field`` setting (defaults to ``*`` which searches all fields) +* **Field-value comparisons**: ``field=value``, ``field!=value``, ``field>value``, ``field>=value``, ``field=18 AND balance<50000 source=accounts`` + +* Limitations: No wildcard or text search support + +**Date Fields**: Range queries, exact matching, IN operator + +* ``search timestamp>="2024-01-01" source=logs`` + +* Limitations: Must use index mapping date format, no wildcards + +**Boolean Fields**: true/false values only, exact matching, IN operator + +* ``search active=true source=users`` + +* Limitations: No wildcards or range queries + +**IP Fields**: Exact matching, CIDR notation + +* ``search client_ip="192.168.1.0/24" source=logs`` + +* Limitations: No wildcards for partial IP matching + +**Field Type Performance Tips**: + + * Each field type has specific search capabilities and limitations. Using the wrong field type during ingestion impacts performance and accuracy + * For wildcard searches on non-keyword fields: Add a keyword field copy for better performance. Example: If you need wildcards on a text field, create ``message.keyword`` alongside ``message`` Cross-Cluster Search ==================== Cross-cluster search lets any node in a cluster execute search requests against other clusters. Refer to `Cross-Cluster Search `_ for configuration. +Examples +======== -Example 1: Fetch all the data -============================= +Example 1: Text Search +----------------------------------- -The example show fetch all the document from accounts index. +**Basic Text Search** (unquoted single term):: -PPL query:: + os> search ERROR source=otellogs | sort @timestamp | fields severityText, body | head 1; + fetched rows / total rows = 1/1 + +--------------+---------------------------------------------------------+ + | severityText | body | + |--------------+---------------------------------------------------------| + | ERROR | Payment failed: Insufficient funds for user@example.com | + +--------------+---------------------------------------------------------+ - os> source=accounts; - fetched rows / total rows = 4/4 +**Phrase Search** (requires quotes for multi-word exact match):: + + os> search "Payment failed" source=otellogs | fields body; + fetched rows / total rows = 1/1 + +---------------------------------------------------------+ + | body | + |---------------------------------------------------------| + | Payment failed: Insufficient funds for user@example.com | + +---------------------------------------------------------+ + +**Implicit AND with Multiple Terms** (unquoted literals are combined with AND):: + + os> search user email source=otellogs | sort @timestamp | fields body | head 1; + fetched rows / total rows = 1/1 + +--------------------------------------------------------------------------------------------------------------------+ + | body | + |--------------------------------------------------------------------------------------------------------------------| + | Executing SQL: SELECT * FROM users WHERE email LIKE '%@gmail.com' AND status != 'deleted' ORDER BY created_at DESC | + +--------------------------------------------------------------------------------------------------------------------+ + +Note: ``search user email`` is equivalent to ``search user AND email``. Multiple unquoted terms are automatically combined with AND. + +**Enclose in double quotes for terms which contain special characters**:: + + os> search "john.doe+newsletter@company.com" source=otellogs | fields body; + fetched rows / total rows = 1/1 + +--------------------------------------------------------------------------------------------------------------------+ + | body | + |--------------------------------------------------------------------------------------------------------------------| + | Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome! Your order #12345 is confirmed' | + +--------------------------------------------------------------------------------------------------------------------+ + +**Mixed Phrase and Boolean**:: + + os> search "User authentication" OR OAuth2 source=otellogs | sort @timestamp | fields body | head 1; + fetched rows / total rows = 1/1 + +----------------------------------------------------------------------------------------------------------+ + | body | + |----------------------------------------------------------------------------------------------------------| + | [2024-01-15 10:30:09] production.INFO: User authentication successful for admin@company.org using OAuth2 | + +----------------------------------------------------------------------------------------------------------+ + +Example 2: Boolean Logic and Operator Precedence +------------------------------------------------- + +**Boolean Operators**:: + + os> search severityText="ERROR" OR severityText="FATAL" source=otellogs | sort @timestamp | fields severityText | head 3; + fetched rows / total rows = 3/3 + +--------------+ + | severityText | + |--------------| + | ERROR | + | FATAL | + | ERROR | + +--------------+ + + os> search severityText="INFO" AND `resource.attributes.service.name`="cart-service" source=otellogs | fields body | head 1; + fetched rows / total rows = 1/1 + +----------------------------------------------------------------------------------+ + | body | + |----------------------------------------------------------------------------------| + | User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart | + +----------------------------------------------------------------------------------+ + +**Operator Precedence** (highest to lowest): Parentheses → NOT → OR → AND:: + + os> search severityText="ERROR" OR severityText="WARN" AND severityNumber>15 source=otellogs | sort @timestamp | fields severityText, severityNumber | head 2; + fetched rows / total rows = 2/2 + +--------------+----------------+ + | severityText | severityNumber | + |--------------+----------------| + | ERROR | 17 | + | ERROR | 17 | + +--------------+----------------+ + +The above evaluates as ``(severityText="ERROR" OR severityText="WARN") AND severityNumber>15`` + +Example 3: NOT vs != Semantics +------------------------------- + +**!= operator** (field must exist and not equal the value):: + + os> search employer!="Quility" source=accounts; + fetched rows / total rows = 2/2 + +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + +**NOT operator** (excludes matching conditions, includes null fields):: + + os> search NOT employer="Quility" source=accounts; + fetched rows / total rows = 3/3 +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ -Example 2: Fetch data with condition -==================================== +**Key difference**: ``!=`` excludes null values, ``NOT`` includes them. + +Dale Adams (account 18) has ``employer=null``. He appears in ``NOT employer="Quility"`` but not in ``employer!="Quility"``. -The example show fetch all the document from accounts index with . +Example 4: Wildcards +-------------------- -PPL query:: +**Wildcard Patterns**:: - os> source=accounts account_number=1 or gender="F"; + os> search severityText=ERR* source=otellogs | sort @timestamp | fields severityText | head 3; + fetched rows / total rows = 3/3 + +--------------+ + | severityText | + |--------------| + | ERROR | + | ERROR | + | ERROR2 | + +--------------+ + + os> search body=user* source=otellogs | sort @timestamp | fields body | head 2; fetched rows / total rows = 2/2 - +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ + +----------------------------------------------------------------------------------+ + | body | + |----------------------------------------------------------------------------------| + | User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart | + | Payment failed: Insufficient funds for user@example.com | + +----------------------------------------------------------------------------------+ + +**Wildcard Rules**: + +* ``*`` - Matches zero or more characters +* ``?`` - Matches exactly one character + +**Single character wildcard (?)**:: + + os> search severityText="INFO?" source=otellogs | sort @timestamp | fields severityText | head 3; + fetched rows / total rows = 3/3 + +--------------+ + | severityText | + |--------------| + | INFO2 | + | INFO3 | + | INFO4 | + +--------------+ + + +Example 5: Range Queries +------------------------- + +Use comparison operators (>, <, >=, <=) to filter numeric and date fields within specific ranges. Range queries are particularly useful for filtering by age, price, timestamps, or any numeric metrics. + +:: + + os> search severityNumber>15 AND severityNumber<=20 source=otellogs | sort @timestamp | fields severityNumber | head 3; + fetched rows / total rows = 3/3 + +----------------+ + | severityNumber | + |----------------| + | 17 | + | 17 | + | 18 | + +----------------+ + + os> search `attributes.payment.amount`>=1000.0 AND `attributes.payment.amount`<=2000.0 source=otellogs | fields body; + fetched rows / total rows = 1/1 + +---------------------------------------------------------+ + | body | + |---------------------------------------------------------| + | Payment failed: Insufficient funds for user@example.com | + +---------------------------------------------------------+ + +Example 6: Field Search with Wildcards +--------------------------------------- + +When searching in text or keyword fields, wildcards enable partial matching. This is particularly useful for finding records where you only know part of the value. Note that wildcards work best with keyword fields, while text fields may produce unexpected results due to tokenization. + +**Partial Search in Keyword Fields**:: + + os> search employer=Py* source=accounts | fields firstname, employer; + fetched rows / total rows = 1/1 + +-----------+----------+ + | firstname | employer | + |-----------+----------| + | Amber | Pyrami | + +-----------+----------+ + +**Combining Wildcards with Field Comparisons**:: + + os> search firstname=A* AND age>30 source=accounts | fields firstname, age, city; + fetched rows / total rows = 1/1 + +-----------+-----+--------+ + | firstname | age | city | + |-----------+-----+--------| + | Amber | 32 | Brogan | + +-----------+-----+--------+ + +**Important Notes on Wildcard Usage**: + +* **Keyword fields**: Best for wildcard searches - exact value matching with pattern support +* **Text fields**: Wildcards apply to individual tokens after analysis, not the entire field value +* **Performance**: Leading wildcards (e.g., ``*@example.com``) are slower than trailing wildcards +* **Case sensitivity**: Keyword field wildcards are case-sensitive unless normalized during indexing + +Example 7: IN Operator and Field Comparisons +--------------------------------------------- + +The IN operator efficiently checks if a field matches any value from a list. This is cleaner and more performant than chaining multiple OR conditions for the same field. + +**IN Operator**:: + + os> search severityText IN ("ERROR", "WARN", "FATAL") source=otellogs | sort @timestamp | fields severityText | head 3; + fetched rows / total rows = 3/3 + +--------------+ + | severityText | + |--------------| + | ERROR | + | WARN | + | FATAL | + +--------------+ + +**Field Comparison Examples**:: + + os> search severityNumber=17 source=otellogs | sort @timestamp | fields body | head 1; + fetched rows / total rows = 1/1 + +---------------------------------------------------------+ + | body | + |---------------------------------------------------------| + | Payment failed: Insufficient funds for user@example.com | + +---------------------------------------------------------+ + + os> search `attributes.user.email`="user@example.com" source=otellogs | fields body; + fetched rows / total rows = 1/1 + +---------------------------------------------------------+ + | body | + |---------------------------------------------------------| + | Payment failed: Insufficient funds for user@example.com | + +---------------------------------------------------------+ + +Example 8: Complex Expressions +------------------------------- + +Combine multiple conditions using boolean operators and parentheses to create sophisticated search queries. +:: + + os> search (severityText="ERROR" OR severityText="WARN") AND severityNumber>10 source=otellogs | sort @timestamp | fields severityText | head 3; + fetched rows / total rows = 3/3 + +--------------+ + | severityText | + |--------------| + | ERROR | + | WARN | + | ERROR | + +--------------+ + + os> search `attributes.user.email`="user@example.com" OR (`attributes.error.code`="INSUFFICIENT_FUNDS" AND severityNumber>15) source=otellogs | fields body; + fetched rows / total rows = 1/1 + +---------------------------------------------------------+ + | body | + |---------------------------------------------------------| + | Payment failed: Insufficient funds for user@example.com | + +---------------------------------------------------------+ + +Example 9: Special Characters and Escaping +------------------------------------------- + +Understand when and how to escape special characters in your search queries. There are two categories of characters that need escaping: + +**Characters that must be escaped**: +* **Backslashes (\)**: Always escape as ``\\`` to search for literal backslash +* **Quotes (")**: Escape as ``\"`` when inside quoted strings + +**Wildcard characters (escape only to search literally)**: +* **Asterisk (*)**: Use as-is for wildcard, escape as ``\\*`` to search for literal asterisk +* **Question mark (?)**: Use as-is for wildcard, escape as ``\\?`` to search for literal question mark + +.. list-table:: Wildcard vs Literal Search + :widths: 25 35 40 + :header-rows: 1 + + * - Intent + - PPL Syntax + - Result + * - Wildcard search + - ``field=user*`` + - Matches "user", "user123", "userABC" + * - Literal "user*" + - ``field="user\\*"`` + - Matches only "user*" + * - Wildcard search + - ``field=log?`` + - Matches "log1", "logA", "logs" + * - Literal "log?" + - ``field="log\\?"`` + - Matches only "log?" + +**Backslash in file paths**:: + + os> search `attributes.error.type`="C:\\\\Users\\\\admin" source=otellogs | fields `attributes.error.type`; + fetched rows / total rows = 1/1 + +-----------------------+ + | attributes.error.type | + |-----------------------| + | C:\Users\admin | + +-----------------------+ + +Note: Each backslash in the search value needs to be escaped with another backslash. When using REST API with JSON, additional JSON escaping is required. + +**Quotes within strings**:: + + os> search body="\"exact phrase\"" source=otellogs | sort @timestamp | fields body | head 1; + fetched rows / total rows = 1/1 + +--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | body | + |--------------------------------------------------------------------------------------------------------------------------------------------------------| + | Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT "exact phrase" wildcard* fuzzy~2 /regex/ [range TO search] | + +--------------------------------------------------------------------------------------------------------------------------------------------------------+ + +**Text with special characters**:: + + os> search "wildcard\\* fuzzy~2" source=otellogs | fields body | head 1; + fetched rows / total rows = 1/1 + +--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | body | + |--------------------------------------------------------------------------------------------------------------------------------------------------------| + | Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT "exact phrase" wildcard* fuzzy~2 /regex/ [range TO search] | + +--------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Example 10: Fetch All Data +---------------------------- + +Retrieve all documents from an index by specifying only the source without any search conditions. This is useful for exploring small datasets or verifying data ingestion. + +:: + + os> source=accounts; + fetched rows / total rows = 4/4 + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ \ No newline at end of file diff --git a/doctest/build.gradle b/doctest/build.gradle index 29ac673429b..d3492b62917 100644 --- a/doctest/build.gradle +++ b/doctest/build.gradle @@ -11,7 +11,6 @@ plugins { id 'base' id 'com.wiredforcode.spawn' id "de.undercouch.download" version "5.3.0" - id 'com.diffplug.spotless' version '6.22.0' } apply plugin: 'opensearch.testclusters' @@ -34,7 +33,7 @@ task cloneSqlCli(type: Exec) { } } -task bootstrap(type: Exec, dependsOn: ['cloneSqlCli', 'spotlessJava']) { +task bootstrap(type: Exec, dependsOn: ['cloneSqlCli']) { inputs.file "$projectDir/bootstrap.sh" outputs.dir "$projectDir/.venv" @@ -185,23 +184,6 @@ tasks.register("runRestTestCluster", RunTask) { useCluster testClusters.docTestCluster; } -spotless { - java { - target fileTree('.') { - include '**/*.java' - exclude '**/build/**', '**/build-*/**' - } - importOrder() -// licenseHeader("/*\n" + -// " * Copyright OpenSearch Contributors\n" + -// " * SPDX-License-Identifier: Apache-2.0\n" + -// " */\n\n") - removeUnusedImports() - trimTrailingWhitespace() - endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') - } -} def getJobSchedulerPlugin(String jsPlugin, String bwcOpenSearchJSDownload) { return provider(new Callable() { diff --git a/doctest/test_data/otellogs.json b/doctest/test_data/otellogs.json new file mode 100644 index 00000000000..61576f423c6 --- /dev/null +++ b/doctest/test_data/otellogs.json @@ -0,0 +1,60 @@ +{"index": {"_id": "1"}} +{"@timestamp": "2024-01-15T10:30:00.123456789Z", "time": "2024-01-15T10:30:00.123456789Z", "severityNumber": 9, "severityText": "INFO", "body": "User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart", "traceId": "b3cb01a03c846973fd496b973f49be85", "spanId": "caf311ef949971cb", "flags": 1, "instrumentationScope": {"name": "cart-service", "version": "1.0.0"}, "resource": {"attributes": {"service.name": "cart-service", "service.namespace": "production", "service.version": "2.0.1"}}, "attributes": {"user.id": "e1ce63e6-8501-11f0-930d-c2fcbdc05f14", "product.id": "HQTGWGPNH4", "quantity": 4}} +{"index": {"_id": "2"}} +{"@timestamp": "2024-01-15T10:30:01.234567890Z", "time": "2024-01-15T10:30:01.234567890Z", "severityNumber": 17, "severityText": "ERROR", "body": "Payment failed: Insufficient funds for user@example.com", "traceId": "7475a30207dbef54d29e42c37f09a528", "spanId": "7a35f3b69a2f9a24", "flags": 1, "instrumentationScope": {"name": "payment-service"}, "resource": {"attributes": {"service.name": "payment-service", "service.namespace": "production"}}, "attributes": {"error.code": "INSUFFICIENT_FUNDS", "user.email": "user@example.com", "payment.amount": 1500.00}} +{"index": {"_id": "3"}} +{"@timestamp": "2024-01-15T10:30:02.345678901Z", "time": "2024-01-15T10:30:02.345678901Z", "severityNumber": 13, "severityText": "WARN", "body": "Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "search-service"}}, "attributes": {"query.raw": "+field:value -excluded AND (grouped OR terms) NOT \"exact phrase\" wildcard* fuzzy~2", "query.type": "lucene"}} +{"index": {"_id": "4"}} +{"@timestamp": "2024-01-15T10:30:03.456789012Z", "time": "2024-01-15T10:30:03.456789012Z", "severityNumber": 5, "severityText": "DEBUG", "body": "192.168.1.1 - - [15/Jan/2024:10:30:03 +0000] \"GET /api/products?search=laptop&category=electronics HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "nginx"}}, "attributes": {"http.method": "GET", "http.status_code": 200, "http.url": "/api/products", "client.ip": "192.168.1.1", "http.response_size": 1234}} +{"index": {"_id": "5"}} +{"@timestamp": "2024-01-15T10:30:04.567890123Z", "time": "2024-01-15T10:30:04.567890123Z", "severityNumber": 9, "severityText": "INFO", "body": "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome! Your order #12345 is confirmed'", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "notification-service"}}, "attributes": {"email.to": "john.doe+newsletter@company.com", "email.subject": "Welcome! Your order #12345 is confirmed", "order.id": "12345"}} +{"index": {"_id": "6"}} +{"@timestamp": "2024-01-15T10:30:05.678901234Z", "time": "2024-01-15T10:30:05.678901234Z", "severityNumber": 21, "severityText": "FATAL", "body": "Database connection pool exhausted: postgresql://db.example.com:5432/production", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "api-gateway", "host.name": "api-server-01"}}, "attributes": {"db.connection_string": "postgresql://db.example.com:5432/production", "db.pool.size": 100, "db.pool.active": 100}} +{"index": {"_id": "7"}} +{"@timestamp": "2024-01-15T10:30:06.789012345Z", "time": "2024-01-15T10:30:06.789012345Z", "severityNumber": 1, "severityText": "TRACE", "body": "Executing SQL: SELECT * FROM users WHERE email LIKE '%@gmail.com' AND status != 'deleted' ORDER BY created_at DESC", "traceId": "abc123def456ghi789", "spanId": "jkl012mno345", "flags": 1, "resource": {"attributes": {"service.name": "user-service"}}, "attributes": {"db.statement": "SELECT * FROM users WHERE email LIKE '%@gmail.com'", "db.operation": "SELECT", "db.table": "users"}} +{"index": {"_id": "8"}} +{"@timestamp": "2024-01-15T10:30:07.890123456Z", "time": "2024-01-15T10:30:07.890123456Z", "severityNumber": 17, "severityText": "ERROR", "body": "Failed to parse JSON with special characters: {\"key\": \"value with \\\"quotes\\\" and: $@#%^&*()[]{}<>|\\/?\"}", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "parser-service"}}, "attributes": {"error.type": "JSON_PARSE_ERROR", "json.input": "{\"key\": \"value with \\\"quotes\\\" and: $@#%^&*()[]{}<>|\\/?\"}"}} +{"index": {"_id": "9"}} +{"@timestamp": "2024-01-15T10:30:08.901234567Z", "time": "2024-01-15T10:30:08.901234567Z", "severityNumber": 13, "severityText": "WARN", "body": "Potential SQL injection detected: '; DROP TABLE users; -- in search parameter", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "security-scanner"}}, "attributes": {"security.threat": "SQL_INJECTION", "input.suspicious": "'; DROP TABLE users; --", "request.parameter": "search"}} +{"index": {"_id": "10"}} +{"@timestamp": "2024-01-15T10:30:09.012345678Z", "time": "2024-01-15T10:30:09.012345678Z", "severityNumber": 9, "severityText": "INFO", "body": "[2024-01-15 10:30:09] production.INFO: User authentication successful for admin@company.org using OAuth2", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "auth-service"}}, "attributes": {"user.email": "admin@company.org", "auth.method": "OAuth2", "auth.provider": "google"}} +{"index": {"_id": "11"}} +{"@timestamp": "2024-01-15T10:30:10.123456789Z", "time": "2024-01-15T10:30:10.123456789Z", "severityNumber": 2, "severityText": "TRACE2", "body": "Redis command: SETEX user:session:abc123 3600 {\"user_id\":\"456\",\"email\":\"alice@wonderland.net\"}", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "cache-service"}}, "attributes": {"redis.command": "SETEX", "redis.key": "user:session:abc123", "redis.ttl": 3600, "user.email": "alice@wonderland.net"}} +{"index": {"_id": "12"}} +{"@timestamp": "2024-01-15T10:30:11.234567890Z", "time": "2024-01-15T10:30:11.234567890Z", "severityNumber": 18, "severityText": "ERROR2", "body": "Elasticsearch query failed: {\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "search-service"}}, "attributes": {"elasticsearch.query": "{\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}", "error.message": "Wildcards not allowed at start of term"}} +{"index": {"_id": "13"}} +{"@timestamp": "2024-01-15T10:30:12.345678901Z", "time": "2024-01-15T10:30:12.345678901Z", "severityNumber": 6, "severityText": "DEBUG2", "body": "JWT token validated for user test.user@domain.co.uk", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "auth-service"}}, "attributes": {"jwt.algorithm": "RS256", "jwt.issuer": "auth.example.com", "user.email": "test.user@domain.co.uk"}} +{"index": {"_id": "14"}} +{"@timestamp": "2024-01-15T10:30:13.456789012Z", "time": "2024-01-15T10:30:13.456789012Z", "severityNumber": 10, "severityText": "INFO2", "body": "Kafka message produced to topic 'user-events' with key 'user-123'", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "event-producer"}}, "attributes": {"kafka.topic": "user-events", "kafka.key": "user-123", "kafka.partition": 3, "kafka.offset": 12345}} +{"index": {"_id": "15"}} +{"@timestamp": "2024-01-15T10:30:14.567890123Z", "time": "2024-01-15T10:30:14.567890123Z", "severityNumber": 14, "severityText": "WARN2", "body": "Rate limit approaching: 450/500 requests for API key ending in ...xyz789", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "api-gateway"}}, "attributes": {"rate_limit.current": 450, "rate_limit.max": 500, "api.key_suffix": "xyz789"}} +{"index": {"_id": "16"}} +{"@timestamp": "2024-01-15T10:30:15.678901234Z", "time": "2024-01-15T10:30:15.678901234Z", "severityNumber": 3, "severityText": "TRACE3", "body": "GraphQL query: { user(email: \"support@helpdesk.io\") { id name orders { id total } } }", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "graphql-service"}}, "attributes": {"graphql.operation": "query", "graphql.field": "user", "user.email": "support@helpdesk.io"}} +{"index": {"_id": "17"}} +{"@timestamp": "2024-01-15T10:30:16.789012345Z", "time": "2024-01-15T10:30:16.789012345Z", "severityNumber": 19, "severityText": "ERROR3", "body": "Failed to send email to multiple recipients: invalid@, not-an-email, missing@.com, @no-local", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "email-service"}}, "attributes": {"email.invalid_recipients": ["invalid@", "not-an-email", "missing@.com", "@no-local"], "error.count": 4}} +{"index": {"_id": "18"}} +{"@timestamp": "2024-01-15T10:30:17.890123456Z", "time": "2024-01-15T10:30:17.890123456Z", "severityNumber": 11, "severityText": "INFO3", "body": "Webhook delivered to https://api.partner.com/webhook?token=abc123&email=webhook@partner.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "webhook-service"}}, "attributes": {"webhook.url": "https://api.partner.com/webhook", "webhook.status": "delivered", "webhook.recipient_email": "webhook@partner.com"}} +{"index": {"_id": "19"}} +{"@timestamp": "2024-01-15T10:30:18.901234567Z", "time": "2024-01-15T10:30:18.901234567Z", "severityNumber": 7, "severityText": "DEBUG3", "body": "MongoDB query: db.users.find({\"email\": {\"$regex\": \".*@(gmail|yahoo|hotmail)\\.com$\"}})", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "user-service"}}, "attributes": {"mongodb.collection": "users", "mongodb.operation": "find", "mongodb.filter": "{\"email\": {\"$regex\": \".*@(gmail|yahoo|hotmail)\\.com$\"}}"}} +{"index": {"_id": "20"}} +{"@timestamp": "2024-01-15T10:30:19.012345678Z", "time": "2024-01-15T10:30:19.012345678Z", "severityNumber": 22, "severityText": "FATAL2", "body": "System shutdown initiated: Out of memory error at UserService.java:142", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "user-service", "host.name": "prod-server-01"}}, "attributes": {"error.type": "C:\\Users\\admin", "error.stacktrace": "java.lang.OutOfMemoryError at UserService.java:142"}} +{"index": {"_id": "21"}} +{"@timestamp": "2024-01-15T10:30:20.123456789Z", "time": "2024-01-15T10:30:20.123456789Z", "severityNumber": 15, "severityText": "WARN3", "body": "SSL certificate expiring in 7 days for domain api.example.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "cert-monitor"}}, "attributes": {"ssl.domain": "api.example.com", "ssl.days_until_expiry": 7, "ssl.issuer": "Let's Encrypt"}} +{"index": {"_id": "22"}} +{"@timestamp": "2024-01-15T10:30:21.234567890Z", "time": "2024-01-15T10:30:21.234567890Z", "severityNumber": 4, "severityText": "TRACE4", "body": "gRPC call: /UserService/GetUserByEmail {\"email\":\"grpc-user@service.net\"} completed in 45ms", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "grpc-service"}}, "attributes": {"grpc.method": "/UserService/GetUserByEmail", "grpc.status_code": 0, "grpc.request.email": "grpc-user@service.net", "duration_ms": 45}} +{"index": {"_id": "23"}} +{"@timestamp": "2024-01-15T10:30:22.345678901Z", "time": "2024-01-15T10:30:22.345678901Z", "severityNumber": 20, "severityText": "ERROR4", "body": "Failed to process message from queue: Invalid JSON in message body containing email notifications@queue.system", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "queue-processor"}}, "attributes": {"queue.name": "email-notifications", "message.id": "msg-789", "error.email": "notifications@queue.system"}} +{"index": {"_id": "24"}} +{"@timestamp": "2024-01-15T10:30:23.456789012Z", "time": "2024-01-15T10:30:23.456789012Z", "severityNumber": 8, "severityText": "DEBUG4", "body": "Regex pattern matched: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$/ against test.email+tag@sub.domain.example.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "validation-service"}}, "attributes": {"regex.pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$", "input.value": "test.email+tag@sub.domain.example.com", "validation.result": true}} +{"index": {"_id": "25"}} +{"@timestamp": "2024-01-15T10:30:24.567890123Z", "time": "2024-01-15T10:30:24.567890123Z", "severityNumber": 12, "severityText": "INFO4", "body": "Batch job completed: Processed 1000 user records, found 50 emails matching pattern *@deprecated-domain.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "batch-processor"}}, "attributes": {"batch.total_records": 1000, "batch.matched_records": 50, "batch.email_pattern": "*@deprecated-domain.com"}} +{"index": {"_id": "26"}} +{"@timestamp": "2024-01-15T10:30:25.678901234Z", "time": "2024-01-15T10:30:25.678901234Z", "severityNumber": 16, "severityText": "WARN4", "body": "Memory usage high: 85% used. May affect performance for operations involving email@performance.test", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "monitoring-service", "container.id": "abc123def456"}}, "attributes": {"memory.usage_percent": 85, "memory.total_gb": 16, "alert.email": "email@performance.test"}} +{"index": {"_id": "27"}} +{"@timestamp": "2024-01-15T10:30:26.789012345Z", "time": "2024-01-15T10:30:26.789012345Z", "severityNumber": 23, "severityText": "FATAL3", "body": "Kubernetes pod crashed: email-service-7d4b8c6d9f-x2zkq in namespace production", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "k8s-monitor", "k8s.namespace": "production"}}, "attributes": {"k8s.pod.name": "email-service-7d4b8c6d9f-x2zkq", "k8s.container.restart_count": 5}} +{"index": {"_id": "28"}} +{"@timestamp": "2024-01-15T10:30:27.890123456Z", "time": "2024-01-15T10:30:27.890123456Z", "severityNumber": 24, "severityText": "FATAL4", "body": "Data corruption detected in user table: email column contains invalid data for user_id=999", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "data-integrity-service"}}, "attributes": {"corruption.table": "users", "corruption.column": "email", "corruption.user_id": 999}} +{"index": {"_id": "29"}} +{"@timestamp": "2024-01-15T10:30:28.901234567Z", "time": "2024-01-15T10:30:28.901234567Z", "severityNumber": 9, "severityText": "INFO", "body": "Health check passed for all services including email-service@health.monitor", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "health-checker"}}, "attributes": {"health.status": "healthy", "health.services_checked": 15, "monitor.email": "email-service@health.monitor"}} +{"index": {"_id": "30"}} +{"@timestamp": "2024-01-15T10:30:29.012345678Z", "time": "2024-01-15T10:30:29.012345678Z", "severityNumber": 9, "severityText": "INFO", "body": "CORS request from origin https://app.example.com to access /api/users/email containing sensitive@data.secure", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "api-gateway"}}, "attributes": {"cors.origin": "https://app.example.com", "cors.method": "POST", "endpoint": "/api/users/email", "data.classification": "sensitive@data.secure"}} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 636902f6390..041bb38a5d2 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -40,7 +40,8 @@ 'occupation': 'occupation.json', 'worker': 'worker.json', 'work_information': 'work_information.json', - 'events': 'events.json' + 'events': 'events.json', + 'otellogs': 'otellogs.json' } DEBUG_MODE = os.environ.get('DOCTEST_DEBUG', 'false').lower() == 'true' diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExistsSubqueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExistsSubqueryIT.java index 72bcad9e6c6..e21f828b210 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExistsSubqueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExistsSubqueryIT.java @@ -83,7 +83,7 @@ public void testSimpleExistsSubqueryInFilter() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s exists [" + "source = %s | where exists [" + " source = %s | where id = uid" + " ]" + "| sort - salary" @@ -120,7 +120,7 @@ public void testNotExistsSubqueryInFilter() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s not exists [" + "source = %s | where not exists [" + " source = %s | where id = uid" + " ]" + "| sort - salary" diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLInSubqueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLInSubqueryIT.java index ad8f8d4a890..ca97533b4ac 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLInSubqueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLInSubqueryIT.java @@ -90,7 +90,7 @@ public void testFilterInSubquery() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s id in [" + "source = %s | where id in [" + " source = %s | fields uid" + " ]" + "| sort - salary" @@ -118,17 +118,7 @@ public void testInSubqueryWithParentheses() throws IOException { + "| sort - salary" + "| fields id, name, salary", TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION)); - JSONObject result2 = - executeQuery( - String.format( - "source = %s (id) in [" - + " source = %s | fields uid" - + " ]" - + "| sort - salary" - + "| fields id, name, salary", - TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION)); verifySchema(result1, schema("id", "int"), schema("name", "string"), schema("salary", "int")); - verifySchema(result2, schema("id", "int"), schema("name", "string"), schema("salary", "int")); verifyDataRowsInOrder( result1, rows(1002, "John", 120000), @@ -136,13 +126,6 @@ public void testInSubqueryWithParentheses() throws IOException { rows(1000, "Jake", 100000), rows(1005, "Jane", 90000), rows(1006, "Tommy", 30000)); - verifyDataRowsInOrder( - result2, - rows(1002, "John", 120000), - rows(1003, "David", 120000), - rows(1000, "Jake", 100000), - rows(1005, "Jane", 90000), - rows(1006, "Tommy", 30000)); } @Test @@ -187,7 +170,7 @@ public void testFilterNotInSubquery() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s id not in [" + "source = %s | where id not in [" + " source = %s | fields uid" + " ]" + "| sort - salary" @@ -219,7 +202,7 @@ public void testEmptyInSubquery() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s id not in [" + "source = %s | where id not in [" + " source = %s | where uid = 0000 | fields uid" + " ]" + "| sort - salary" diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLScalarSubqueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLScalarSubqueryIT.java index 2c53fbd47ca..4aac43580f3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLScalarSubqueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLScalarSubqueryIT.java @@ -107,7 +107,7 @@ public void testUncorrelatedScalarSubqueryInSelectAndInFilter() throws IOExcepti JSONObject result = executeQuery( String.format( - "source = %s id > [ source = %s | stats count(department) ] + 999" + "source = %s | where id > [ source = %s | stats count(department) ] + 999" + "| eval count_dept = [" + " source = %s | stats count(department)" + " ]" @@ -193,7 +193,7 @@ public void testCorrelatedScalarSubqueryInFilter() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s id = [ source = %s | where id = uid | stats max(uid) ]" + "source = %s | where id = [ source = %s | where id = uid | stats max(uid) ]" + "| fields id, name", TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION)); verifySchema(result, schema("id", "int"), schema("name", "string")); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index ffb13792b8a..bcfed3ff3da 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -6,51 +6,7 @@ package org.opensearch.sql.legacy; import static com.google.common.base.Strings.isNullOrEmpty; -import static org.opensearch.sql.legacy.TestUtils.createIndexByRestClient; -import static org.opensearch.sql.legacy.TestUtils.getAccountIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getAliasIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getArrayIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getBankIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getBankWithNullValuesIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getBig5MappingFile; -import static org.opensearch.sql.legacy.TestUtils.getClickBenchMappingFile; -import static org.opensearch.sql.legacy.TestUtils.getDataTypeNonnumericIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDataTypeNumericIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDateIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDateTimeIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDeepNestedIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDogIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDogs2IndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDogs3IndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getDuplicationNullableIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getGeoIpIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getHdfsLogsIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getHobbiesIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getJsonTestIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getLogsIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getMappingFile; -import static org.opensearch.sql.legacy.TestUtils.getNestedSimpleIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getNestedTypeIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getOccupationIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getOdbcIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getOrderIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getPeople2IndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getPhraseIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getResponseBody; -import static org.opensearch.sql.legacy.TestUtils.getStateCountryIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getStringIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getTpchMappingFile; -import static org.opensearch.sql.legacy.TestUtils.getUnexpandedObjectIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getWeblogsIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getWorkInformationIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getWorkerIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.isIndexExist; -import static org.opensearch.sql.legacy.TestUtils.loadDataByRestClient; +import static org.opensearch.sql.legacy.TestUtils.*; import static org.opensearch.sql.legacy.plugin.RestSqlAction.CURSOR_CLOSE_ENDPOINT; import static org.opensearch.sql.legacy.plugin.RestSqlAction.EXPLAIN_API_ENDPOINT; import static org.opensearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; @@ -901,6 +857,11 @@ public enum Index { "logs", getLogsIndexMapping(), "src/test/resources/logs.json"), + OTELLOGS( + TestsConstants.TEST_INDEX_OTEL_LOGS, + "otel_logs", + getOtelLogsIndexMapping(), + "src/test/resources/otellogs.json"), TIME_TEST_DATA( "opensearch-sql_test_index_time_data", "time_data", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 6f605837196..a93cc1f1b50 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -310,6 +310,11 @@ public static String getLogsIndexMapping() { return getMappingFile(mappingFile); } + public static String getOtelLogsIndexMapping() { + String mappingFile = "otellogs_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index fd017d3ee61..a9f166dc466 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -80,6 +80,7 @@ public class TestsConstants { public static final String TEST_INDEX_ARRAY = TEST_INDEX + "_array"; public static final String TEST_INDEX_HDFS_LOGS = TEST_INDEX + "_hdfs_logs"; public static final String TEST_INDEX_LOGS = TEST_INDEX + "_logs"; + public static final String TEST_INDEX_OTEL_LOGS = TEST_INDEX + "_otel_logs"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 6b8bba65cbb..9aedfaeea1f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -8,6 +8,7 @@ import static org.hamcrest.Matchers.containsString; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.assertJsonEqualsIgnoreId; @@ -28,6 +29,7 @@ public void init() throws Exception { loadIndex(Index.BANK); loadIndex(Index.DATE_FORMATS); loadIndex(Index.WEBLOG); + loadIndex(Index.OTELLOGS); } @Test @@ -638,4 +640,35 @@ protected String loadExpectedPlan(String fileName) throws IOException { } return loadFromFile(prefix + fileName); } + + // Search command explain examples - 3 core use cases + + @Test + public void testExplainSearchBasicText() throws IOException { + // Example 1: Basic text search without field specification + String expected = loadExpectedPlan("explain_search_basic_text.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString(String.format("search source=%s ERROR", TEST_INDEX_OTEL_LOGS))); + } + + @Test + public void testExplainSearchNumericComparison() throws IOException { + // Example 2: Numeric field comparison with greater than + String expected = loadExpectedPlan("explain_search_numeric_comparison.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + String.format("search source=%s severityNumber>15", TEST_INDEX_OTEL_LOGS))); + } + + @Test + public void testExplainSearchWildcardStar() throws IOException { + // Example 3: Wildcard search with asterisk for pattern matching + String expected = loadExpectedPlan("explain_search_wildcard_star.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + String.format("search source=%s severityText=ERR*", TEST_INDEX_OTEL_LOGS))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/OperatorIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/OperatorIT.java index 8d022f42791..7faaac19cec 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/OperatorIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/OperatorIT.java @@ -13,7 +13,6 @@ import java.io.IOException; import org.json.JSONObject; import org.junit.jupiter.api.Test; -import org.opensearch.client.ResponseException; public class OperatorIT extends PPLIntegTestCase { @Override @@ -155,7 +154,12 @@ public void testEqualOperator() throws IOException { executeQuery(String.format("source=%s age = 32 | fields age", TEST_INDEX_BANK)); verifyDataRows(result, rows(32)); - result = executeQuery(String.format("source=%s 32 = age | fields age", TEST_INDEX_BANK)); + result = + executeQuery(String.format("source=%s | where age = 32 | fields age", TEST_INDEX_BANK)); + verifyDataRows(result, rows(32)); + + result = + executeQuery(String.format("source=%s | where 32 = age | fields age", TEST_INDEX_BANK)); verifyDataRows(result, rows(32)); } @@ -165,7 +169,12 @@ public void testNotEqualOperator() throws IOException { executeQuery(String.format("source=%s age != 32 | fields age", TEST_INDEX_BANK)); verifyDataRows(result, rows(28), rows(33), rows(34), rows(36), rows(36), rows(39)); - result = executeQuery(String.format("source=%s 32 != age | fields age", TEST_INDEX_BANK)); + result = + executeQuery(String.format("source=%s | where age != 32 | fields age", TEST_INDEX_BANK)); + verifyDataRows(result, rows(28), rows(33), rows(34), rows(36), rows(36), rows(39)); + + result = + executeQuery(String.format("source=%s | where 32 != age | fields age", TEST_INDEX_BANK)); verifyDataRows(result, rows(28), rows(33), rows(34), rows(36), rows(36), rows(39)); } @@ -202,7 +211,7 @@ public void testLikeFunction() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s like(firstname, 'Hatti_') | fields firstname", TEST_INDEX_BANK)); + "source=%s | where like(firstname, 'Hatti_') | fields firstname", TEST_INDEX_BANK)); verifyDataRows(result, rows("Hattie")); } @@ -224,22 +233,4 @@ public void testBinaryPredicateWithMissingValue() throws IOException { TEST_INDEX_BANK_WITH_NULL_VALUES)); verifyDataRows(result, rows(48086)); } - - private void queryExecutionShouldThrowExceptionDueToNullOrMissingValue( - String query, String... errorMsgs) { - try { - executeQuery(query); - fail( - "Expected to throw ExpressionEvaluationException, but none was thrown for query: " - + query); - } catch (ResponseException e) { - String errorMsg = e.getMessage(); - assertTrue(errorMsg.contains("ExpressionEvaluationException")); - for (String msg : errorMsgs) { - assertTrue(errorMsg.contains(msg)); - } - } catch (IOException e) { - throw new IllegalStateException("Unexpected exception raised for query: " + query); - } - } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java index 44703ec1d17..da32d3a79b9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java @@ -24,6 +24,7 @@ public void init() throws Exception { super.init(); loadIndex(Index.BANK); loadIndex(Index.DOG); + loadIndex(Index.OTELLOGS); } @Test @@ -67,4 +68,853 @@ public void searchCommandWithoutSourceShouldFailToParse() throws IOException { assertTrue(e.getMessage().contains(SYNTAX_EX_MSG_FRAGMENT)); } } + + @Test + public void testSearchWithFieldEquals() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s `resource.attributes.service.name`=\\\"cart-service\\\" | fields" + + " body ", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows("User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart")); + } + + @Test + public void testSearchWithFieldNotEquals() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityText!=\\\"INFO\\\" | sort time | fields severityText |" + + " head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, rows("ERROR"), rows("WARN"), rows("DEBUG"), rows("FATAL"), rows("TRACE")); + } + + @Test + public void testSearchWithNumericComparison() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityNumber>15 AND severityNumber<=20 | sort time | fields" + + " severityNumber, severityText", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows(17, "ERROR"), + rows(17, "ERROR"), + rows(18, "ERROR2"), + rows(19, "ERROR3"), + rows(20, "ERROR4"), + rows(16, "WARN4")); + } + + // ===== Boolean Operator Tests ===== + + @Test + public void testSearchWithOROperator() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityText=\\\"ERROR\\\" OR severityText=\\\"FATAL\\\" | sort" + + " time | fields severityText | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows("ERROR"), rows("FATAL"), rows("ERROR")); + } + + @Test + public void testSearchWithANDOperator() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityText=\\\"INFO\\\" AND" + + " `resource.attributes.service.name`=\\\"cart-service\\\" | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows("User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart")); + } + + @Test + public void testDifferenceBetweenNOTAndNotEquals() throws IOException { + // Demonstrate the key difference between != and NOT: + // != requires the field to exist and not equal the value + // NOT field=value excludes docs where field equals value OR field doesn't exist + + JSONObject resultNotEquals = + executeQuery( + String.format( + "search source=%s `attributes.http.status_code`!=200 | sort time | fields body |" + + " head 5", + TEST_INDEX_OTEL_LOGS)); + // This should return documents where http.status_code exists but is not 200 + verifyDataRows(resultNotEquals); + + JSONObject resultNOT = + executeQuery( + String.format( + "search source=%s NOT `attributes.http.status_code`=200 | sort time | fields body |" + + " head 5", + TEST_INDEX_OTEL_LOGS)); + // This returns docs where http.status_code != 200 OR where http.status_code doesn't exist at + // all + verifyDataRows( + resultNOT, + rows("User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart"), + rows("Payment failed: Insufficient funds for user@example.com"), + rows( + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]"), + rows( + "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome!" + + " Your order #12345 is confirmed'"), + rows("Database connection pool exhausted: postgresql://db.example.com:5432/production")); + + // Test 3: Demonstrate with another optional field - attributes.user.email + // != only returns docs where user.email exists and is not the specified value + JSONObject resultEmailNotEquals = + executeQuery( + String.format( + "search source=%s `attributes.user.email`!=\\\"user@example.com\\\" | sort time |" + + " fields body | head 2", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + resultEmailNotEquals, + rows( + "[2024-01-15 10:30:09] production.INFO: User authentication successful for" + + " admin@company.org using OAuth2"), + rows( + "Redis command: SETEX user:session:abc123 3600" + + " {\"user_id\":\"456\",\"email\":\"alice@wonderland.net\"}")); + + // NOT returns all docs except where user.email="user@example.com" + // Including docs that don't have user.email field at all + JSONObject resultEmailNOT = + executeQuery( + String.format( + "search source=%s NOT `attributes.user.email`=\\\"user@example.com\\\" | sort time" + + " | fields body | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + resultEmailNOT, + rows("User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart"), + rows( + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]"), + rows( + "192.168.1.1 - - [15/Jan/2024:10:30:03 +0000] \"GET" + + " /api/products?search=laptop&category=electronics HTTP/1.1\" 200 1234 \"-\"" + + " \"Mozilla/5.0\""), + rows( + "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome!" + + " Your order #12345 is confirmed'"), + rows("Database connection pool exhausted: postgresql://db.example.com:5432/production")); + } + + @Test + public void testSearchWithComplexBooleanExpression() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s (severityText=\\\"ERROR\\\" OR severityText=\\\"WARN\\\") AND" + + " severityNumber>10 | sort time | fields severityText, severityNumber | head" + + " 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, rows("ERROR", 17), rows("WARN", 13), rows("ERROR", 17), rows("WARN", 13)); + } + + @Test + public void testSearchWithNestedParentheses() throws IOException { + JSONObject resultWithParentheses = + executeQuery( + String.format( + "search source=%s ((severityNumber<15 AND severityNumber>5) OR (severityNumber>20))" + + " | sort time | fields severityNumber | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(resultWithParentheses, rows(9), rows(13), rows(9), rows(21), rows(13)); + + JSONObject resultWithoutParentheses = + executeQuery( + String.format( + "search source=%s severityNumber<15 AND severityNumber>5 OR severityNumber>20" + + " | sort time | fields severityNumber | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(resultWithoutParentheses, rows(9), rows(13), rows(9), rows(13), rows(9)); + + JSONObject resultDifferentParentheses = + executeQuery( + String.format( + "search source=%s severityNumber<15 AND (severityNumber>5 OR severityNumber>20)" + + " | sort time | fields severityNumber | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(resultDifferentParentheses, rows(9), rows(13), rows(9), rows(13), rows(9)); + } + + // ===== IN Operator Tests ===== + + @Test + public void testSearchWithINOperator() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityText IN (\\\"ERROR\\\", \\\"WARN\\\", \\\"FATAL\\\") |" + + " sort time | fields severityText | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows("ERROR"), rows("WARN"), rows("FATAL"), rows("ERROR"), rows("WARN")); + } + + // ===== Free Text Search Tests ===== + + @Test + public void testSearchWithFreeText() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s \\\"@example.com\\\" | fields body | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows("Payment failed: Insufficient funds for user@example.com"), + rows( + "Elasticsearch query failed:" + + " {\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}")); + } + + @Test + public void testSearchWithPhraseSearch() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s \\\"Payment failed\\\" | fields body", TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows("Payment failed: Insufficient funds for user@example.com")); + } + + @Test + public void testSearchWithMultipleFreeTextTerms() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s \\\"email\\\" \\\"user\\\" | sort time | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows( + "GraphQL query: { user(email: \"support@helpdesk.io\") { id name orders { id total } }" + + " }"), + rows( + "gRPC call: /UserService/GetUserByEmail {\"email\":\"grpc-user@service.net\"} completed" + + " in 45ms"), + rows( + "Data corruption detected in user table: email column contains invalid data for" + + " user_id=999")); + } + + // ===== Edge Cases with Special Characters ===== + + @Test + public void testSearchWithEmailInBody() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s \\\"john.doe+newsletter@company.com\\\" | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows( + "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome!" + + " Your order #12345 is confirmed'")); + } + + @Test + public void testSearchWithLuceneSpecialCharacters() throws IOException { + // Test that special characters in quoted strings are searched literally + JSONObject result = + executeQuery( + String.format( + "search source=%s \\\"wildcard* fuzzy~2\\\" | fields body", TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows( + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]")); + } + + @Test + public void testWildcardPatternMatching() throws IOException { + // Test wildcard pattern matching on different field types + // * matches zero or more characters + // ? matches exactly one character + + // Test 1: Wildcard on keyword field (severityText) + // Search for severity levels starting with "ERR" + JSONObject keywordWildcard = + executeQuery( + String.format( + "search source=%s severityText=ERR* | sort time | fields severityText, body | head" + + " 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + keywordWildcard, + rows("ERROR", "Payment failed: Insufficient funds for user@example.com"), + rows( + "ERROR", + "Failed to parse JSON with special characters: {\"key\": \"value with \\\"quotes\\\"" + + " and: $@#%^&*()[]{}<>|\\/?\"}"), + rows( + "ERROR2", + "Elasticsearch query failed:" + + " {\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}"), + rows( + "ERROR3", + "Failed to send email to multiple recipients: invalid@, not-an-email, missing@.com," + + " @no-local"), + rows( + "ERROR4", + "Failed to process message from queue: Invalid JSON in message body containing email" + + " notifications@queue.system")); + + // Test 2: Single character wildcard with ? + // ? matches exactly one character + // INFO? matches INFO2, INFO3, INFO4 (5 characters total) + // INF? would match any 4-character string starting with "INF" + JSONObject singleCharWildcard = + executeQuery( + String.format( + "search source=%s severityText=\\\"INFO?\\\" | sort time | fields severityText," + + " body | head 3", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + singleCharWildcard, + rows("INFO2", "Kafka message produced to topic 'user-events' with key 'user-123'"), + rows( + "INFO3", + "Webhook delivered to" + + " https://api.partner.com/webhook?token=abc123&email=webhook@partner.com"), + rows( + "INFO4", + "Batch job completed: Processed 1000 user records, found 50 emails matching pattern" + + " *@deprecated-domain.com")); + + // Test 3: Wildcard on text field (body) + // Search for any log containing "user" followed by anything and "@example" + JSONObject textFieldWildcard = + executeQuery( + String.format( + "search source=%s body=\\\"user*\\\" | sort time | fields body | head 3", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + textFieldWildcard, + rows("User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart"), + rows("Payment failed: Insufficient funds for user@example.com"), + rows( + "Executing SQL: SELECT * FROM users WHERE email LIKE '%@gmail.com' AND status !=" + + " 'deleted' ORDER BY created_at DESC")); + + // Test 4: Free text search with wildcards (no field specified) + // Search for words starting with "fail" + JSONObject freeTextWildcard = + executeQuery( + String.format( + "search source=%s fail* | sort time | fields body | head 3", TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + freeTextWildcard, + rows("Payment failed: Insufficient funds for user@example.com"), + rows( + "Failed to parse JSON with special characters: {\"key\": \"value with \\\"quotes\\\"" + + " and: $@#%^&*()[]{}<>|\\/?\"}"), + rows( + "Elasticsearch query failed:" + + " {\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}")); + + // Test 5: Complex wildcard patterns on service names + // Search for service names ending with "-service" + JSONObject serviceWildcard = + executeQuery( + String.format( + "search source=%s `resource.attributes.service.name`=*-service | sort time | fields" + + " `resource.attributes.service.name`, body | head 4", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + serviceWildcard, + rows( + "cart-service", + "User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart"), + rows("payment-service", "Payment failed: Insufficient funds for user@example.com"), + rows( + "search-service", + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]"), + rows( + "notification-service", + "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome!" + + " Your order #12345 is confirmed'")); + + // Test 6: Combining wildcards with other operators + JSONObject combinedWildcard = + executeQuery( + String.format( + "search source=%s severityText=ERR* AND severityNumber>16 | sort time | fields" + + " severityText, severityNumber | head 3", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(combinedWildcard, rows("ERROR", 17), rows("ERROR", 17), rows("ERROR2", 18)); + } + + @Test + public void testWildcardEscaping() throws IOException { + // Test escaping wildcards to search for literal * and ? characters + // The test data contains: "wildcard* fuzzy~2" as literal text + + // Test 1: Unescaped * is a wildcard - matches "wildcard" followed by anything + JSONObject wildcardSearch = + executeQuery( + String.format( + "search source=%s wildcard* | sort time | fields body, `attributes.error.message` |" + + " head 3", + TEST_INDEX_OTEL_LOGS)); + // Should match any document containing words starting with "wildcard" + verifyDataRows( + wildcardSearch, + rows( + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]", + null), + rows( + "Elasticsearch query failed:" + + " {\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}", + "Wildcards not allowed at start of term")); + + // Test 2: Escaped * searches for literal asterisk + // To search for literal "wildcard*", need to escape the asterisk + JSONObject literalAsterisk = + executeQuery( + String.format( + "search source=%s \\\"wildcard\\\\*\\\" | fields body, `attributes.error.message`", + TEST_INDEX_OTEL_LOGS)); + // Should match the document containing literal "wildcard*" + verifyDataRows( + literalAsterisk, + rows( + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]", + null)); + + // Test 3: Search for paths with backslashes (like Windows paths) + // The test data contains: "C:\\Users\\admin" + // lucene escaping -> \\\\ + // rest request escaping -> \\\\\\\\ + // java string escaping ->\\\\\\\\ + JSONObject backslashSearch = + executeQuery( + String.format( + "search source=%s" + + " `attributes.error.type`=\\\"C:\\\\\\\\\\\\\\\\Users\\\\\\\\\\\\\\\\admin\\\"" + + " | sort time | fields attributes.error.type", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(backslashSearch, rows("C:\\Users\\admin")); + } + + @Test + public void testSearchWithSQLInjectionPattern() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s \\\"DROP TABLE users\\\" | fields body", TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows("Potential SQL injection detected: '; DROP TABLE users; -- in search parameter")); + } + + @Test + public void testSearchWithJSONSpecialChars() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s\\\"quotes\\\\\\\" and: $@#\\\" | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows( + "Failed to parse JSON with special characters: {\"key\": \"value with \\\"quotes\\\"" + + " and: $@#%^&*()[]{}<>|\\/?\"}")); + } + + @Test + public void testSearchWithIPAddress() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s `attributes.client.ip`=\\\"192.168.1.1\\\" | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows( + "192.168.1.1 - - [15/Jan/2024:10:30:03 +0000] \"GET" + + " /api/products?search=laptop&category=electronics HTTP/1.1\" 200 1234 \"-\"" + + " \"Mozilla/5.0\"")); + } + + // ===== Complex Mixed Queries ===== + + @Test + public void testSearchMixedWithPipeCommands() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityNumber>10 | where severityText != \\\"INFO\\\" | sort" + + " time | fields severityText, body | head 5", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows("ERROR", "Payment failed: Insufficient funds for user@example.com"), + rows( + "WARN", + "Query contains Lucene special characters: +field:value -excluded AND (grouped OR" + + " terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]"), + rows( + "FATAL", + "Database connection pool exhausted: postgresql://db.example.com:5432/production"), + rows( + "ERROR", + "Failed to parse JSON with special characters: {\"key\": \"value with \\\"quotes\\\"" + + " and: $@#%^&*()[]{}<>|\\/?\"}"), + rows( + "WARN", + "Potential SQL injection detected: '; DROP TABLE users; -- in search parameter")); + } + + @Test + public void testSearchWithMultipleFieldTypes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityText=\\\"ERROR\\\" AND severityNumber=17 | fields" + + " severityText, severityNumber | head 2", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows("ERROR", 17), rows("ERROR", 17)); + } + + // ===== Attribute Field Searches ===== + + @Test + public void testSearchWithAttributeFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s `attributes.http.status_code`=200 | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows( + "192.168.1.1 - - [15/Jan/2024:10:30:03 +0000] \"GET" + + " /api/products?search=laptop&category=electronics HTTP/1.1\" 200 1234 \"-\"" + + " \"Mozilla/5.0\"")); + } + + @Test + public void testSearchWithNestedEmailAttribute() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s `attributes.user.email`=\\\"user@example.com\\\" | fields body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows("Payment failed: Insufficient funds for user@example.com")); + } + + // ===== Range Query Edge Cases ===== + + @Test + public void testSearchWithInclusiveRanges() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityNumber>=9 AND severityNumber<=10 | sort time | fields" + + " severityNumber, body | head 6", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows(9, "User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart"), + rows( + 9, + "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome!" + + " Your order #12345 is confirmed'"), + rows( + 9, + "[2024-01-15 10:30:09] production.INFO: User authentication successful for" + + " admin@company.org using OAuth2"), + rows(10, "Kafka message produced to topic 'user-events' with key 'user-123'"), + rows(9, "Health check passed for all services including email-service@health.monitor"), + rows( + 9, + "CORS request from origin https://app.example.com to access /api/users/email containing" + + " sensitive@data.secure")); + } + + @Test + public void testSearchWithImpossibleRange() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityNumber>30 AND severityNumber<5", TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result); + } + + // ===== IN Operator Edge Cases ===== + + @Test + public void testSearchWithEmptyINList() throws IOException { + JSONObject result = + executeQuery( + String.format("search source=%s severityNumber IN (999)", TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result); + } + + @Test + public void testSearchWithSingleValueIN() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityText IN (\\\"ERROR\\\") | fields severityText | head 3", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows("ERROR"), rows("ERROR")); + // search with quotes included. + result = + executeQuery( + String.format( + "search source=%s severityText IN (\\\"\\\\\\\"ERROR\\\\\\\"\\\") | fields" + + " severityText | head 3", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result); + } + + @Test + public void testSearchWithUpperCaseValue() throws IOException { + JSONObject result = + executeQuery( + String.format("search source=%s severityText=\\\"NOTEXIST\\\"", TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result); + } + + @Test + public void testSearchWithInvalidFieldName() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s nonexistent_field=\\\"value\\\"", TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result); + } + + @Test + public void testSearchWithTypeMismatch() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s severityNumber=\\\"not-a-number\\\"", TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result); + } + + @Test + public void testSearchWithDoubleFieldComparisons() throws IOException { + // Test 1: Exact match with decimal notation + JSONObject exactMatch = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`=1500.0 | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(exactMatch, rows(1500.0)); + + // Test 2: Double with 'd' suffix + JSONObject doubleSuffix = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`=1500.0d | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(doubleSuffix, rows(1500.0)); + + // Test 3: Float with 'f' suffix + JSONObject floatSuffix = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`=1500.0f | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(floatSuffix, rows(1500.0)); + + // Test 4: Integer value on double field (no decimal) + JSONObject integerValue = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`=1500 | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(integerValue, rows(1500.0)); + } + + @Test + public void testSearchWithDoubleRangeOperators() throws IOException { + // Test 1: Greater than + JSONObject greaterThan = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`>1000.0 | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(greaterThan, rows(1500.0)); + + // Test 2: Greater or equal + JSONObject greaterOrEqual = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`>=1500.0 | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(greaterOrEqual, rows(1500.0)); + + // Test 3: Range query with AND + JSONObject rangeQuery = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`>=1000.0 AND" + + " `attributes.payment.amount`<=2000.0 | fields `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(rangeQuery, rows(1500.0)); + + // Test 4: Not equals (should return no results as we only have 1500.0) + JSONObject notEquals = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount`!=1500.0 | fields" + + " `attributes.payment.amount`", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(notEquals); + } + + @Test + public void testSearchWithDoubleINOperator() throws IOException { + // Test double with IN operator + JSONObject result = + executeQuery( + String.format( + "search source=%s `attributes.payment.amount` IN (1000.0, 1500.0, 2000.0) | fields" + + " `attributes.payment.amount`, body", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows(result, rows(1500.0, "Payment failed: Insufficient funds for user@example.com")); + } + + @Test + public void testSearchWithDateFormats() throws IOException { + // Test 1: Full timestamp with nanoseconds + JSONObject fullTimestamp = + executeQuery( + String.format( + "search source=%s @timestamp=\\\"2024-01-15T10:30:00.123456789Z\\\" | sort" + + " @timestamp | fields @timestamp, body | head 1", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + fullTimestamp, + rows( + "2024-01-15 10:30:00.123456789", + "User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart")); + + // Test 2: Date only format (matches all records from that date) + JSONObject dateOnly = + executeQuery( + String.format( + "search source=%s @timestamp=\\\"2024-01-15\\\" | sort @timestamp | fields" + + " @timestamp | head 3", + TEST_INDEX_OTEL_LOGS)); + // Should match multiple records from 2024-01-15 + verifyDataRows( + dateOnly, + rows("2024-01-15 10:30:00.123456789"), + rows("2024-01-15 10:30:01.23456789"), + rows("2024-01-15 10:30:02.345678901")); + + // Test 3: Timestamp without nanoseconds + JSONObject timestampNoNanos = + executeQuery( + String.format( + "search source=%s @timestamp=\\\"2024-01-15T10:30:01\\\" | sort @timestamp | fields" + + " @timestamp | head 2", + TEST_INDEX_OTEL_LOGS)); + // Should match records at that second + verifyDataRows(timestampNoNanos, rows("2024-01-15 10:30:01.23456789")); + } + + @Test + public void testSearchWithDateRangeComparisons() throws IOException { + // Test 1: Greater than - finds records after specified time + JSONObject greaterThan = + executeQuery( + String.format( + "search source=%s @timestamp>\\\"2024-01-15T10:30:00Z\\\" | sort @timestamp |" + + " fields @timestamp | head 3", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + greaterThan, + rows("2024-01-15 10:30:01.23456789"), + rows("2024-01-15 10:30:02.345678901"), + rows("2024-01-15 10:30:03.456789012")); + + // Test 2: Less than or equal - finds records up to specified time + JSONObject lessOrEqual = + executeQuery( + String.format( + "search source=%s @timestamp<=\\\"2024-01-15T10:30:01Z\\\" | sort @timestamp |" + + " fields @timestamp | head 2", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + lessOrEqual, rows("2024-01-15 10:30:00.123456789"), rows("2024-01-15 10:30:01.23456789")); + + // Test 3: Date range with AND - finds records within time window + JSONObject dateRange = + executeQuery( + String.format( + "search source=%s @timestamp>=\\\"2024-01-15T10:30:00Z\\\" AND" + + " @timestamp<\\\"2024-01-15T10:30:05Z\\\" | sort @timestamp | fields" + + " @timestamp | head 5", + TEST_INDEX_OTEL_LOGS)); + // Should return records within that 5 second window + verifyDataRows( + dateRange, + rows("2024-01-15 10:30:00.123456789"), + rows("2024-01-15 10:30:01.23456789"), + rows("2024-01-15 10:30:02.345678901"), + rows("2024-01-15 10:30:03.456789012"), + rows("2024-01-15 10:30:04.567890123")); + // Test 4: Not equals (rarely used for timestamps but should work) + JSONObject notEquals = + executeQuery( + String.format( + "search source=%s @timestamp!=\\\"2024-01-15T10:30:00.123456789Z\\\" | sort" + + " @timestamp | fields @timestamp | head 3", + TEST_INDEX_OTEL_LOGS)); + // Should return all other records + verifyDataRows( + notEquals, + rows("2024-01-15 10:30:01.23456789"), + rows("2024-01-15 10:30:02.345678901"), + rows("2024-01-15 10:30:03.456789012")); + } + + @Test + public void testSearchWithDateINOperator() throws IOException { + // Test date field with IN operator for specific timestamps + JSONObject result = + executeQuery( + String.format( + "search source=%s @timestamp IN (\\\"2024-01-15T10:30:00.123456789Z\\\"," + + " \\\"2024-01-15T10:30:01.234567890Z\\\") | sort @timestamp | fields" + + " @timestamp, severityText", + TEST_INDEX_OTEL_LOGS)); + verifyDataRows( + result, + rows("2024-01-15 10:30:00.123456789", "INFO"), + rows("2024-01-15 10:30:01.23456789", "ERROR")); + } } diff --git a/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_can_match_shortcut.ppl b/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_can_match_shortcut.ppl index c0ca2fcd093..aab85fb7c1b 100644 --- a/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_can_match_shortcut.ppl +++ b/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_can_match_shortcut.ppl @@ -1,3 +1,3 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | sort + `@timestamp` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_no_can_match_shortcut.ppl b/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_no_can_match_shortcut.ppl index c0ca2fcd093..aab85fb7c1b 100644 --- a/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_no_can_match_shortcut.ppl +++ b/integ-test/src/test/resources/big5/queries/asc_sort_timestamp_no_can_match_shortcut.ppl @@ -1,3 +1,3 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | sort + `@timestamp` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_can_match_shortcut.ppl b/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_can_match_shortcut.ppl index fabaae912e0..84205ef61fc 100644 --- a/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_can_match_shortcut.ppl +++ b/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_can_match_shortcut.ppl @@ -1,3 +1,3 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | sort - `@timestamp` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_no_can_match_shortcut.ppl b/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_no_can_match_shortcut.ppl index fabaae912e0..84205ef61fc 100644 --- a/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_no_can_match_shortcut.ppl +++ b/integ-test/src/test/resources/big5/queries/desc_sort_timestamp_no_can_match_shortcut.ppl @@ -1,3 +1,3 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | sort - `@timestamp` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/keyword_in_range.ppl b/integ-test/src/test/resources/big5/queries/keyword_in_range.ppl index 56f07779432..1c717e8472c 100644 --- a/integ-test/src/test/resources/big5/queries/keyword_in_range.ppl +++ b/integ-test/src/test/resources/big5/queries/keyword_in_range.ppl @@ -1,4 +1,4 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/query_string_on_message.ppl b/integ-test/src/test/resources/big5/queries/query_string_on_message.ppl index 730600c1280..2f0d33a6a3b 100644 --- a/integ-test/src/test/resources/big5/queries/query_string_on_message.ppl +++ b/integ-test/src/test/resources/big5/queries/query_string_on_message.ppl @@ -1,2 +1,2 @@ -source = big5 query_string(['message'], 'shield AND carp AND shark') +source = big5 | where query_string(['message'], 'shield AND carp AND shark') | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered.ppl b/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered.ppl index 8f00176fa75..3abd5cea089 100644 --- a/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered.ppl +++ b/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered.ppl @@ -1,4 +1,4 @@ -source = big5 query_string(['message'], 'shield carp shark', default_operator='AND') +source = big5 message=shield message=carp message=shark | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered_sorted_num.ppl b/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered_sorted_num.ppl index 566a2ef8413..8baf49c987c 100644 --- a/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered_sorted_num.ppl +++ b/integ-test/src/test/resources/big5/queries/query_string_on_message_filtered_sorted_num.ppl @@ -1,4 +1,4 @@ -source = big5 query_string(['message'], 'shield AND carp AND shark') +source = big5 | where query_string(['message'], 'shield AND carp AND shark') | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | sort - `metrics.size` diff --git a/integ-test/src/test/resources/big5/queries/sort_keyword_can_match_shortcut.ppl b/integ-test/src/test/resources/big5/queries/sort_keyword_can_match_shortcut.ppl index c0ca2fcd093..aab85fb7c1b 100644 --- a/integ-test/src/test/resources/big5/queries/sort_keyword_can_match_shortcut.ppl +++ b/integ-test/src/test/resources/big5/queries/sort_keyword_can_match_shortcut.ppl @@ -1,3 +1,3 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | sort + `@timestamp` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/sort_keyword_no_can_match_shortcut.ppl b/integ-test/src/test/resources/big5/queries/sort_keyword_no_can_match_shortcut.ppl index c0ca2fcd093..aab85fb7c1b 100644 --- a/integ-test/src/test/resources/big5/queries/sort_keyword_no_can_match_shortcut.ppl +++ b/integ-test/src/test/resources/big5/queries/sort_keyword_no_can_match_shortcut.ppl @@ -1,3 +1,3 @@ -source = big5 match(`process.name`, 'kernel') +source = big5 process.name=kernel | sort + `@timestamp` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/sort_numeric_asc_with_match.ppl b/integ-test/src/test/resources/big5/queries/sort_numeric_asc_with_match.ppl index 604a3c28e5c..198667db866 100644 --- a/integ-test/src/test/resources/big5/queries/sort_numeric_asc_with_match.ppl +++ b/integ-test/src/test/resources/big5/queries/sort_numeric_asc_with_match.ppl @@ -1,3 +1,3 @@ -source = big5 match(`log.file.path`, '/var/log/messages/solarshark') +source = big5 log.file.path=\"/var/log/messages/solarshark\" | sort + `metrics.size` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/sort_numeric_desc_with_match.ppl b/integ-test/src/test/resources/big5/queries/sort_numeric_desc_with_match.ppl index 607d6689230..f282e9ae67d 100644 --- a/integ-test/src/test/resources/big5/queries/sort_numeric_desc_with_match.ppl +++ b/integ-test/src/test/resources/big5/queries/sort_numeric_desc_with_match.ppl @@ -1,3 +1,3 @@ -source = big5 match(`log.file.path`, '/var/log/messages/solarshark') +source = big5 log.file.path=\"/var/log/messages/solarshark\" | sort - `metrics.size` | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json new file mode 100644 index 00000000000..0c69b0f25e2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$153], severityNumber=[$154], time=[$155], body=[$156])\n LogicalFilter(condition=[query_string(MAP('query', 'ERROR':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'ERROR':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json new file mode 100644 index 00000000000..47ef52587fb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$153], severityNumber=[$154], time=[$155], body=[$156])\n LogicalFilter(condition=[query_string(MAP('query', 'severityNumber:>15':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityNumber:>15':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json new file mode 100644 index 00000000000..18d5ca27c78 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$153], severityNumber=[$154], time=[$155], body=[$156])\n LogicalFilter(condition=[query_string(MAP('query', 'severityText:ERR*':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityText:ERR*':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json new file mode 100644 index 00000000000..1cdab05e428 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$153], severityNumber=[$154], time=[$155], body=[$156])\n LogicalFilter(condition=[query_string(MAP('query', 'ERROR':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..162=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t153], severityNumber=[$t154], time=[$t155], body=[$t156])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query', 'ERROR':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json new file mode 100644 index 00000000000..4c33e3d38a7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$153], severityNumber=[$154], time=[$155], body=[$156])\n LogicalFilter(condition=[query_string(MAP('query', 'severityNumber:>15':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..162=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t153], severityNumber=[$t154], time=[$t155], body=[$t156])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query', 'severityNumber:>15':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json new file mode 100644 index 00000000000..9c3a54491fa --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$153], severityNumber=[$154], time=[$155], body=[$156])\n LogicalFilter(condition=[query_string(MAP('query', 'severityText:ERR*':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..162=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t153], severityNumber=[$t154], time=[$t155], body=[$t156])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query', 'severityText:ERR*':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json new file mode 100644 index 00000000000..55a3524df21 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json @@ -0,0 +1,17 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body]" + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json new file mode 100644 index 00000000000..fc8d456376e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json @@ -0,0 +1,17 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body]" + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json new file mode 100644 index 00000000000..165846b2356 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json @@ -0,0 +1,17 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body]" + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/otellogs_mapping.json b/integ-test/src/test/resources/indexDefinitions/otellogs_mapping.json new file mode 100644 index 00000000000..c78442e2ad8 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/otellogs_mapping.json @@ -0,0 +1,303 @@ +{ + "mappings": { + "dynamic_templates": [ + { + "long_resource_attributes": { + "path_match": "resource.attributes.*", + "match_mapping_type": "long", + "mapping": { + "type": "long" + } + } + }, + { + "double_resource_attributes": { + "path_match": "resource.attributes.*", + "match_mapping_type": "double", + "mapping": { + "type": "double" + } + } + }, + { + "string_resource_attributes": { + "path_match": "resource.attributes.*", + "match_mapping_type": "string", + "mapping": { + "type": "keyword" + } + } + }, + { + "long_attributes": { + "path_match": "attributes.*", + "match_mapping_type": "long", + "mapping": { + "type": "long" + } + } + }, + { + "double_attributes": { + "path_match": "attributes.*", + "match_mapping_type": "double", + "mapping": { + "type": "double" + } + } + }, + { + "string_attributes": { + "path_match": "attributes.*", + "match_mapping_type": "string", + "mapping": { + "type": "keyword" + } + } + } + ], + "date_detection": false, + "properties": { + "@timestamp": { + "type": "date_nanos" + }, + "time": { + "type": "date_nanos" + }, + "body": { + "type": "text" + }, + "severityNumber": { + "type": "long" + }, + "severityText": { + "type": "keyword" + }, + "traceId": { + "type": "keyword" + }, + "spanId": { + "type": "keyword" + }, + "flags": { + "type": "long" + }, + "droppedAttributesCount": { + "type": "integer" + }, + "instrumentationScope": { + "properties": { + "name": { + "type": "keyword" + }, + "version": { + "type": "keyword" + }, + "droppedAttributesCount": { + "type": "integer" + } + } + }, + "resource": { + "properties": { + "attributes": { + "properties": { + "service.name": { + "type": "keyword" + }, + "service.namespace": { + "type": "keyword" + }, + "service.version": { + "type": "keyword" + }, + "host.name": { + "type": "keyword" + }, + "container.id": { + "type": "keyword" + }, + "k8s.namespace": { + "type": "keyword" + }, + "k8s.pod.name": { + "type": "keyword" + } + } + }, + "droppedAttributesCount": { + "type": "integer" + } + } + }, + "attributes": { + "properties": { + "user.id": { + "type": "keyword" + }, + "user.email": { + "type": "keyword" + }, + "product.id": { + "type": "keyword" + }, + "order.id": { + "type": "keyword" + }, + "error.code": { + "type": "keyword" + }, + "error.type": { + "type": "keyword" + }, + "error.message": { + "type": "text" + }, + "http.method": { + "type": "keyword" + }, + "http.status_code": { + "type": "long" + }, + "http.url": { + "type": "keyword" + }, + "client.ip": { + "type": "ip", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "db.statement": { + "type": "text" + }, + "db.operation": { + "type": "keyword" + }, + "db.table": { + "type": "keyword" + }, + "email.to": { + "type": "keyword" + }, + "email.subject": { + "type": "text" + }, + "query.raw": { + "type": "text" + }, + "query.type": { + "type": "keyword" + }, + "security.threat": { + "type": "keyword" + }, + "payment.amount": { + "type": "double" + }, + "quantity": { + "type": "long" + }, + "kafka.topic": { + "type": "keyword" + }, + "kafka.partition": { + "type": "long" + }, + "kafka.offset": { + "type": "long" + }, + "redis.command": { + "type": "keyword" + }, + "redis.key": { + "type": "keyword" + }, + "redis.ttl": { + "type": "long" + }, + "jwt.algorithm": { + "type": "keyword" + }, + "jwt.issuer": { + "type": "keyword" + }, + "rate_limit.current": { + "type": "long" + }, + "rate_limit.max": { + "type": "long" + }, + "graphql.operation": { + "type": "keyword" + }, + "graphql.field": { + "type": "keyword" + }, + "webhook.url": { + "type": "keyword" + }, + "webhook.status": { + "type": "keyword" + }, + "mongodb.collection": { + "type": "keyword" + }, + "mongodb.operation": { + "type": "keyword" + }, + "mongodb.filter": { + "type": "text" + }, + "elasticsearch.query": { + "type": "text" + }, + "grpc.method": { + "type": "keyword" + }, + "grpc.status_code": { + "type": "long" + }, + "ssl.domain": { + "type": "keyword" + }, + "ssl.days_until_expiry": { + "type": "long" + }, + "memory.usage_percent": { + "type": "double" + }, + "memory.total_gb": { + "type": "long" + }, + "batch.total_records": { + "type": "long" + }, + "batch.matched_records": { + "type": "long" + }, + "k8s.pod.name": { + "type": "keyword" + }, + "k8s.container.restart_count": { + "type": "long" + }, + "health.status": { + "type": "keyword" + }, + "cors.origin": { + "type": "keyword" + }, + "cors.method": { + "type": "keyword" + }, + "duration_ms": { + "type": "long" + } + } + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/otellogs.json b/integ-test/src/test/resources/otellogs.json new file mode 100644 index 00000000000..61576f423c6 --- /dev/null +++ b/integ-test/src/test/resources/otellogs.json @@ -0,0 +1,60 @@ +{"index": {"_id": "1"}} +{"@timestamp": "2024-01-15T10:30:00.123456789Z", "time": "2024-01-15T10:30:00.123456789Z", "severityNumber": 9, "severityText": "INFO", "body": "User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart", "traceId": "b3cb01a03c846973fd496b973f49be85", "spanId": "caf311ef949971cb", "flags": 1, "instrumentationScope": {"name": "cart-service", "version": "1.0.0"}, "resource": {"attributes": {"service.name": "cart-service", "service.namespace": "production", "service.version": "2.0.1"}}, "attributes": {"user.id": "e1ce63e6-8501-11f0-930d-c2fcbdc05f14", "product.id": "HQTGWGPNH4", "quantity": 4}} +{"index": {"_id": "2"}} +{"@timestamp": "2024-01-15T10:30:01.234567890Z", "time": "2024-01-15T10:30:01.234567890Z", "severityNumber": 17, "severityText": "ERROR", "body": "Payment failed: Insufficient funds for user@example.com", "traceId": "7475a30207dbef54d29e42c37f09a528", "spanId": "7a35f3b69a2f9a24", "flags": 1, "instrumentationScope": {"name": "payment-service"}, "resource": {"attributes": {"service.name": "payment-service", "service.namespace": "production"}}, "attributes": {"error.code": "INSUFFICIENT_FUNDS", "user.email": "user@example.com", "payment.amount": 1500.00}} +{"index": {"_id": "3"}} +{"@timestamp": "2024-01-15T10:30:02.345678901Z", "time": "2024-01-15T10:30:02.345678901Z", "severityNumber": 13, "severityText": "WARN", "body": "Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT \"exact phrase\" wildcard* fuzzy~2 /regex/ [range TO search]", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "search-service"}}, "attributes": {"query.raw": "+field:value -excluded AND (grouped OR terms) NOT \"exact phrase\" wildcard* fuzzy~2", "query.type": "lucene"}} +{"index": {"_id": "4"}} +{"@timestamp": "2024-01-15T10:30:03.456789012Z", "time": "2024-01-15T10:30:03.456789012Z", "severityNumber": 5, "severityText": "DEBUG", "body": "192.168.1.1 - - [15/Jan/2024:10:30:03 +0000] \"GET /api/products?search=laptop&category=electronics HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "nginx"}}, "attributes": {"http.method": "GET", "http.status_code": 200, "http.url": "/api/products", "client.ip": "192.168.1.1", "http.response_size": 1234}} +{"index": {"_id": "5"}} +{"@timestamp": "2024-01-15T10:30:04.567890123Z", "time": "2024-01-15T10:30:04.567890123Z", "severityNumber": 9, "severityText": "INFO", "body": "Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome! Your order #12345 is confirmed'", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "notification-service"}}, "attributes": {"email.to": "john.doe+newsletter@company.com", "email.subject": "Welcome! Your order #12345 is confirmed", "order.id": "12345"}} +{"index": {"_id": "6"}} +{"@timestamp": "2024-01-15T10:30:05.678901234Z", "time": "2024-01-15T10:30:05.678901234Z", "severityNumber": 21, "severityText": "FATAL", "body": "Database connection pool exhausted: postgresql://db.example.com:5432/production", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "api-gateway", "host.name": "api-server-01"}}, "attributes": {"db.connection_string": "postgresql://db.example.com:5432/production", "db.pool.size": 100, "db.pool.active": 100}} +{"index": {"_id": "7"}} +{"@timestamp": "2024-01-15T10:30:06.789012345Z", "time": "2024-01-15T10:30:06.789012345Z", "severityNumber": 1, "severityText": "TRACE", "body": "Executing SQL: SELECT * FROM users WHERE email LIKE '%@gmail.com' AND status != 'deleted' ORDER BY created_at DESC", "traceId": "abc123def456ghi789", "spanId": "jkl012mno345", "flags": 1, "resource": {"attributes": {"service.name": "user-service"}}, "attributes": {"db.statement": "SELECT * FROM users WHERE email LIKE '%@gmail.com'", "db.operation": "SELECT", "db.table": "users"}} +{"index": {"_id": "8"}} +{"@timestamp": "2024-01-15T10:30:07.890123456Z", "time": "2024-01-15T10:30:07.890123456Z", "severityNumber": 17, "severityText": "ERROR", "body": "Failed to parse JSON with special characters: {\"key\": \"value with \\\"quotes\\\" and: $@#%^&*()[]{}<>|\\/?\"}", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "parser-service"}}, "attributes": {"error.type": "JSON_PARSE_ERROR", "json.input": "{\"key\": \"value with \\\"quotes\\\" and: $@#%^&*()[]{}<>|\\/?\"}"}} +{"index": {"_id": "9"}} +{"@timestamp": "2024-01-15T10:30:08.901234567Z", "time": "2024-01-15T10:30:08.901234567Z", "severityNumber": 13, "severityText": "WARN", "body": "Potential SQL injection detected: '; DROP TABLE users; -- in search parameter", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "security-scanner"}}, "attributes": {"security.threat": "SQL_INJECTION", "input.suspicious": "'; DROP TABLE users; --", "request.parameter": "search"}} +{"index": {"_id": "10"}} +{"@timestamp": "2024-01-15T10:30:09.012345678Z", "time": "2024-01-15T10:30:09.012345678Z", "severityNumber": 9, "severityText": "INFO", "body": "[2024-01-15 10:30:09] production.INFO: User authentication successful for admin@company.org using OAuth2", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "auth-service"}}, "attributes": {"user.email": "admin@company.org", "auth.method": "OAuth2", "auth.provider": "google"}} +{"index": {"_id": "11"}} +{"@timestamp": "2024-01-15T10:30:10.123456789Z", "time": "2024-01-15T10:30:10.123456789Z", "severityNumber": 2, "severityText": "TRACE2", "body": "Redis command: SETEX user:session:abc123 3600 {\"user_id\":\"456\",\"email\":\"alice@wonderland.net\"}", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "cache-service"}}, "attributes": {"redis.command": "SETEX", "redis.key": "user:session:abc123", "redis.ttl": 3600, "user.email": "alice@wonderland.net"}} +{"index": {"_id": "12"}} +{"@timestamp": "2024-01-15T10:30:11.234567890Z", "time": "2024-01-15T10:30:11.234567890Z", "severityNumber": 18, "severityText": "ERROR2", "body": "Elasticsearch query failed: {\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "search-service"}}, "attributes": {"elasticsearch.query": "{\"query\":{\"bool\":{\"must\":[{\"match\":{\"email\":\"*@example.com\"}}]}}}", "error.message": "Wildcards not allowed at start of term"}} +{"index": {"_id": "13"}} +{"@timestamp": "2024-01-15T10:30:12.345678901Z", "time": "2024-01-15T10:30:12.345678901Z", "severityNumber": 6, "severityText": "DEBUG2", "body": "JWT token validated for user test.user@domain.co.uk", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "auth-service"}}, "attributes": {"jwt.algorithm": "RS256", "jwt.issuer": "auth.example.com", "user.email": "test.user@domain.co.uk"}} +{"index": {"_id": "14"}} +{"@timestamp": "2024-01-15T10:30:13.456789012Z", "time": "2024-01-15T10:30:13.456789012Z", "severityNumber": 10, "severityText": "INFO2", "body": "Kafka message produced to topic 'user-events' with key 'user-123'", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "event-producer"}}, "attributes": {"kafka.topic": "user-events", "kafka.key": "user-123", "kafka.partition": 3, "kafka.offset": 12345}} +{"index": {"_id": "15"}} +{"@timestamp": "2024-01-15T10:30:14.567890123Z", "time": "2024-01-15T10:30:14.567890123Z", "severityNumber": 14, "severityText": "WARN2", "body": "Rate limit approaching: 450/500 requests for API key ending in ...xyz789", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "api-gateway"}}, "attributes": {"rate_limit.current": 450, "rate_limit.max": 500, "api.key_suffix": "xyz789"}} +{"index": {"_id": "16"}} +{"@timestamp": "2024-01-15T10:30:15.678901234Z", "time": "2024-01-15T10:30:15.678901234Z", "severityNumber": 3, "severityText": "TRACE3", "body": "GraphQL query: { user(email: \"support@helpdesk.io\") { id name orders { id total } } }", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "graphql-service"}}, "attributes": {"graphql.operation": "query", "graphql.field": "user", "user.email": "support@helpdesk.io"}} +{"index": {"_id": "17"}} +{"@timestamp": "2024-01-15T10:30:16.789012345Z", "time": "2024-01-15T10:30:16.789012345Z", "severityNumber": 19, "severityText": "ERROR3", "body": "Failed to send email to multiple recipients: invalid@, not-an-email, missing@.com, @no-local", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "email-service"}}, "attributes": {"email.invalid_recipients": ["invalid@", "not-an-email", "missing@.com", "@no-local"], "error.count": 4}} +{"index": {"_id": "18"}} +{"@timestamp": "2024-01-15T10:30:17.890123456Z", "time": "2024-01-15T10:30:17.890123456Z", "severityNumber": 11, "severityText": "INFO3", "body": "Webhook delivered to https://api.partner.com/webhook?token=abc123&email=webhook@partner.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "webhook-service"}}, "attributes": {"webhook.url": "https://api.partner.com/webhook", "webhook.status": "delivered", "webhook.recipient_email": "webhook@partner.com"}} +{"index": {"_id": "19"}} +{"@timestamp": "2024-01-15T10:30:18.901234567Z", "time": "2024-01-15T10:30:18.901234567Z", "severityNumber": 7, "severityText": "DEBUG3", "body": "MongoDB query: db.users.find({\"email\": {\"$regex\": \".*@(gmail|yahoo|hotmail)\\.com$\"}})", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "user-service"}}, "attributes": {"mongodb.collection": "users", "mongodb.operation": "find", "mongodb.filter": "{\"email\": {\"$regex\": \".*@(gmail|yahoo|hotmail)\\.com$\"}}"}} +{"index": {"_id": "20"}} +{"@timestamp": "2024-01-15T10:30:19.012345678Z", "time": "2024-01-15T10:30:19.012345678Z", "severityNumber": 22, "severityText": "FATAL2", "body": "System shutdown initiated: Out of memory error at UserService.java:142", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "user-service", "host.name": "prod-server-01"}}, "attributes": {"error.type": "C:\\Users\\admin", "error.stacktrace": "java.lang.OutOfMemoryError at UserService.java:142"}} +{"index": {"_id": "21"}} +{"@timestamp": "2024-01-15T10:30:20.123456789Z", "time": "2024-01-15T10:30:20.123456789Z", "severityNumber": 15, "severityText": "WARN3", "body": "SSL certificate expiring in 7 days for domain api.example.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "cert-monitor"}}, "attributes": {"ssl.domain": "api.example.com", "ssl.days_until_expiry": 7, "ssl.issuer": "Let's Encrypt"}} +{"index": {"_id": "22"}} +{"@timestamp": "2024-01-15T10:30:21.234567890Z", "time": "2024-01-15T10:30:21.234567890Z", "severityNumber": 4, "severityText": "TRACE4", "body": "gRPC call: /UserService/GetUserByEmail {\"email\":\"grpc-user@service.net\"} completed in 45ms", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "grpc-service"}}, "attributes": {"grpc.method": "/UserService/GetUserByEmail", "grpc.status_code": 0, "grpc.request.email": "grpc-user@service.net", "duration_ms": 45}} +{"index": {"_id": "23"}} +{"@timestamp": "2024-01-15T10:30:22.345678901Z", "time": "2024-01-15T10:30:22.345678901Z", "severityNumber": 20, "severityText": "ERROR4", "body": "Failed to process message from queue: Invalid JSON in message body containing email notifications@queue.system", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "queue-processor"}}, "attributes": {"queue.name": "email-notifications", "message.id": "msg-789", "error.email": "notifications@queue.system"}} +{"index": {"_id": "24"}} +{"@timestamp": "2024-01-15T10:30:23.456789012Z", "time": "2024-01-15T10:30:23.456789012Z", "severityNumber": 8, "severityText": "DEBUG4", "body": "Regex pattern matched: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$/ against test.email+tag@sub.domain.example.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "validation-service"}}, "attributes": {"regex.pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$", "input.value": "test.email+tag@sub.domain.example.com", "validation.result": true}} +{"index": {"_id": "25"}} +{"@timestamp": "2024-01-15T10:30:24.567890123Z", "time": "2024-01-15T10:30:24.567890123Z", "severityNumber": 12, "severityText": "INFO4", "body": "Batch job completed: Processed 1000 user records, found 50 emails matching pattern *@deprecated-domain.com", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "batch-processor"}}, "attributes": {"batch.total_records": 1000, "batch.matched_records": 50, "batch.email_pattern": "*@deprecated-domain.com"}} +{"index": {"_id": "26"}} +{"@timestamp": "2024-01-15T10:30:25.678901234Z", "time": "2024-01-15T10:30:25.678901234Z", "severityNumber": 16, "severityText": "WARN4", "body": "Memory usage high: 85% used. May affect performance for operations involving email@performance.test", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "monitoring-service", "container.id": "abc123def456"}}, "attributes": {"memory.usage_percent": 85, "memory.total_gb": 16, "alert.email": "email@performance.test"}} +{"index": {"_id": "27"}} +{"@timestamp": "2024-01-15T10:30:26.789012345Z", "time": "2024-01-15T10:30:26.789012345Z", "severityNumber": 23, "severityText": "FATAL3", "body": "Kubernetes pod crashed: email-service-7d4b8c6d9f-x2zkq in namespace production", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "k8s-monitor", "k8s.namespace": "production"}}, "attributes": {"k8s.pod.name": "email-service-7d4b8c6d9f-x2zkq", "k8s.container.restart_count": 5}} +{"index": {"_id": "28"}} +{"@timestamp": "2024-01-15T10:30:27.890123456Z", "time": "2024-01-15T10:30:27.890123456Z", "severityNumber": 24, "severityText": "FATAL4", "body": "Data corruption detected in user table: email column contains invalid data for user_id=999", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "data-integrity-service"}}, "attributes": {"corruption.table": "users", "corruption.column": "email", "corruption.user_id": 999}} +{"index": {"_id": "29"}} +{"@timestamp": "2024-01-15T10:30:28.901234567Z", "time": "2024-01-15T10:30:28.901234567Z", "severityNumber": 9, "severityText": "INFO", "body": "Health check passed for all services including email-service@health.monitor", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "health-checker"}}, "attributes": {"health.status": "healthy", "health.services_checked": 15, "monitor.email": "email-service@health.monitor"}} +{"index": {"_id": "30"}} +{"@timestamp": "2024-01-15T10:30:29.012345678Z", "time": "2024-01-15T10:30:29.012345678Z", "severityNumber": 9, "severityText": "INFO", "body": "CORS request from origin https://app.example.com to access /api/users/email containing sensitive@data.secure", "traceId": "", "spanId": "", "flags": 0, "resource": {"attributes": {"service.name": "api-gateway"}}, "attributes": {"cors.origin": "https://app.example.com", "cors.method": "POST", "endpoint": "/api/users/email", "data.classification": "sensitive@data.secure"}} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexRules.java index f2bea42e5ab..6d349aa452d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexRules.java @@ -29,6 +29,10 @@ public class OpenSearchIndexRules { private static final ExpandCollationOnProjectExprRule EXPAND_COLLATION_ON_PROJECT_EXPR = ExpandCollationOnProjectExprRule.Config.DEFAULT.toRule(); + // Rule that always pushes down relevance functions regardless of pushdown settings + public static final OpenSearchRelevanceFunctionPushdownRule RELEVANCE_FUNCTION_PUSHDOWN = + OpenSearchRelevanceFunctionPushdownRule.Config.DEFAULT.toRule(); + public static final List OPEN_SEARCH_INDEX_SCAN_RULES = ImmutableList.of( PROJECT_INDEX_SCAN, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java new file mode 100644 index 00000000000..36909772c52 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchRelevanceFunctionPushdownRule.java @@ -0,0 +1,118 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.sql.opensearch.planner.physical; + +import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET; +import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlOperator; +import org.immutables.value.Value; +import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; + +/** + * Planner rule that always pushes down filters containing relevance functions (like query_string, + * match, multi_match, etc.) to OpenSearch, regardless of the pushdown settings. This ensures + * relevance functions are always executed by OpenSearch for optimal performance and functionality. + */ +@Value.Enclosing +public class OpenSearchRelevanceFunctionPushdownRule + extends RelRule { + + /** Creates an OpenSearchRelevanceFunctionPushdownRule. */ + protected OpenSearchRelevanceFunctionPushdownRule(Config config) { + super(config); + } + + @Override + public void onMatch(RelOptRuleCall call) { + if (call.rels.length == 2) { + final LogicalFilter filter = call.rel(0); + final CalciteLogicalIndexScan scan = call.rel(1); + + // This rule is only used when pushdown is disabled, + // so we only push down filters that contain relevance functions + if (containsRelevanceFunction(filter.getCondition())) { + apply(call, filter, scan); + } + } else { + throw new AssertionError( + String.format("The length of rels should be 2 but got %s", call.rels.length)); + } + } + + protected void apply(RelOptRuleCall call, Filter filter, CalciteLogicalIndexScan scan) { + AbstractRelNode newRel = scan.pushDownFilter(filter); + if (newRel != null) { + call.transformTo(newRel); + } + } + + /** + * Checks if a RexNode contains any relevance functions. + * + * @param node The RexNode to check + * @return true if the node contains relevance functions, false otherwise + */ + private boolean containsRelevanceFunction(RexNode node) { + RelevanceFunctionVisitor visitor = new RelevanceFunctionVisitor(); + node.accept(visitor); + return visitor.hasRelevanceFunction(); + } + + /** Visitor to detect relevance functions in a RexNode tree. */ + private static class RelevanceFunctionVisitor extends RexVisitorImpl { + private boolean foundRelevanceFunction = false; + + RelevanceFunctionVisitor() { + super(true); + } + + @Override + public Void visitCall(RexCall call) { + SqlOperator operator = call.getOperator(); + String operatorName = operator.getName().toLowerCase(); + + // Check if this is a relevance function + if (SINGLE_FIELD_RELEVANCE_FUNCTION_SET.contains(operatorName) + || MULTI_FIELDS_RELEVANCE_FUNCTION_SET.contains(operatorName)) { + foundRelevanceFunction = true; + return null; // Stop traversing once we find a relevance function + } + + // Continue traversing the tree + return super.visitCall(call); + } + + boolean hasRelevanceFunction() { + return foundRelevanceFunction; + } + } + + /** Rule configuration. */ + @Value.Immutable + public interface Config extends RelRule.Config { + /** Config that matches Filter on CalciteLogicalIndexScan. */ + Config DEFAULT = + ImmutableOpenSearchRelevanceFunctionPushdownRule.Config.builder() + .build() + .withOperandSupplier( + b0 -> + b0.operand(LogicalFilter.class) + .oneInput(b1 -> b1.operand(CalciteLogicalIndexScan.class).noInputs())); + + @Override + default OpenSearchRelevanceFunctionPushdownRule toRule() { + return new OpenSearchRelevanceFunctionPushdownRule(this); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 036c3533617..21ea41fa3af 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -103,9 +103,13 @@ public void register(RelOptPlanner planner) { super.register(planner); planner.addRule(EnumerableIndexScanRule.DEFAULT_CONFIG.toRule()); if (osIndex.getSettings().getSettingValue(Settings.Key.CALCITE_PUSHDOWN_ENABLED)) { + // When pushdown is enabled, use normal rules (they handle everything including relevance + // functions) for (RelOptRule rule : OpenSearchIndexRules.OPEN_SEARCH_INDEX_SCAN_RULES) { planner.addRule(rule); } + } else { + planner.addRule(OpenSearchIndexRules.RELEVANCE_FUNCTION_PUSHDOWN); } } diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 87230eedd63..d032437dc80 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -118,9 +118,54 @@ commandName ; searchCommand - : (SEARCH)? (logicalExpression)* fromClause (logicalExpression)* # searchFrom + : (SEARCH)? (searchExpression)* fromClause (searchExpression)* # searchFrom + ; + +searchExpression + : LT_PRTHS searchExpression RT_PRTHS # groupedExpression + | NOT searchExpression # notExpression + | searchExpression OR searchExpression # orExpression + | searchExpression AND searchExpression # andExpression + | searchTerm # termExpression + ; + +searchTerm + : searchFieldComparison # searchComparisonTerm + | searchFieldInList # searchInListTerm + | searchLiteral # searchLiteralTerm + ; + +// Unified search literal for both free text and field comparisons +searchLiteral + : numericLiteral + | booleanLiteral + | ID + | stringLiteral + | searchableKeyWord ; +searchFieldComparison + : fieldExpression searchComparisonOperator searchLiteral # searchFieldCompare + ; + +searchFieldInList + : fieldExpression IN LT_PRTHS searchLiteralList RT_PRTHS # searchFieldInValues + ; + +searchLiteralList + : searchLiteral (COMMA searchLiteral)* # searchLiterals + ; + +searchComparisonOperator + : EQUAL # equals + | NOT_EQUAL # notEquals + | LESS # lessThan + | NOT_GREATER # lessOrEqual + | GREATER # greaterThan + | NOT_LESS # greaterOrEqual + ; + + describeCommand : DESCRIBE tableSourceClause ; @@ -1330,6 +1375,11 @@ wildcard ; keywordsCanBeId + : searchableKeyWord + | IN + ; + +searchableKeyWord : D // OD SQL and ODBC special | timespanUnit | SPAN @@ -1342,12 +1392,12 @@ keywordsCanBeId | multiFieldRelevanceFunctionName | commandName | collectionFunctionName + | REGEX | explainMode | REGEXP // commands assist keywords | CASE | ELSE - | IN | ARROW | BETWEEN | EXISTS diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index f9306432bb5..b848db7c551 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -48,7 +48,6 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFieldsExcludeMeta; -import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.EqualTo; @@ -60,6 +59,9 @@ import org.opensearch.sql.ast.expression.PatternMethod; import org.opensearch.sql.ast.expression.PatternMode; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.SearchAnd; +import org.opensearch.sql.ast.expression.SearchExpression; +import org.opensearch.sql.ast.expression.SearchGroup; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -95,6 +97,7 @@ import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; +import org.opensearch.sql.ast.tree.Search; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.SpanBin; import org.opensearch.sql.ast.tree.SubqueryAlias; @@ -160,15 +163,34 @@ public UnresolvedPlan visitSubSearch(OpenSearchPPLParser.SubSearchContext ctx) { /** Search command. */ @Override public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { - if (ctx.logicalExpression().isEmpty()) { + if (ctx.searchExpression().isEmpty()) { return visitFromClause(ctx.fromClause()); } else { - return new Filter( - ctx.logicalExpression().stream() - .map(this::internalVisitExpression) - .reduce(And::new) - .get()) - .attach(visit(ctx.fromClause())); + // Build search expressions using visitor pattern + List searchExprs = + ctx.searchExpression().stream() + .map(expr -> (SearchExpression) expressionBuilder.visit(expr)) + .toList(); + // Combine multiple expressions with AND + SearchExpression combined; + if (searchExprs.size() == 1) { + combined = searchExprs.getFirst(); + } else { + // before being combined with AND (e.g., "a=1 b=-1" becomes "(a:1) AND (b:-1)") + combined = + searchExprs.stream() + .map(SearchGroup::new) + .map(SearchExpression.class::cast) + .reduce(SearchAnd::new) + .get(); // Safe because we know size > 1 from the if condition + } + + // Convert to query string + String queryString = combined.toQueryString(); + + // Create Search node with relation and query string + Relation relation = (Relation) visitFromClause(ctx.fromClause()); + return new Search(relation, queryString); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index c055a95d55b..79be1ca0e9f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -22,36 +22,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.RuleContext; import org.opensearch.sql.ast.dsl.AstDSL; -import org.opensearch.sql.ast.expression.AggregateFunction; -import org.opensearch.sql.ast.expression.Alias; -import org.opensearch.sql.ast.expression.AllFields; -import org.opensearch.sql.ast.expression.And; -import org.opensearch.sql.ast.expression.Argument; -import org.opensearch.sql.ast.expression.Between; -import org.opensearch.sql.ast.expression.Case; -import org.opensearch.sql.ast.expression.Cast; -import org.opensearch.sql.ast.expression.Compare; -import org.opensearch.sql.ast.expression.DataType; -import org.opensearch.sql.ast.expression.EqualTo; -import org.opensearch.sql.ast.expression.Field; -import org.opensearch.sql.ast.expression.Function; -import org.opensearch.sql.ast.expression.In; -import org.opensearch.sql.ast.expression.Interval; -import org.opensearch.sql.ast.expression.IntervalUnit; -import org.opensearch.sql.ast.expression.LambdaFunction; -import org.opensearch.sql.ast.expression.Let; -import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.expression.Not; -import org.opensearch.sql.ast.expression.Or; -import org.opensearch.sql.ast.expression.QualifiedName; -import org.opensearch.sql.ast.expression.RelevanceFieldList; -import org.opensearch.sql.ast.expression.Span; -import org.opensearch.sql.ast.expression.SpanUnit; -import org.opensearch.sql.ast.expression.UnresolvedArgument; -import org.opensearch.sql.ast.expression.UnresolvedExpression; -import org.opensearch.sql.ast.expression.When; -import org.opensearch.sql.ast.expression.WindowFunction; -import org.opensearch.sql.ast.expression.Xor; +import org.opensearch.sql.ast.expression.*; import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; import org.opensearch.sql.ast.expression.subquery.InSubquery; import org.opensearch.sql.ast.expression.subquery.ScalarSubquery; @@ -791,4 +762,116 @@ public UnresolvedExpression visitNumericSpanValue( public UnresolvedExpression visitLogWithBaseSpan(OpenSearchPPLParser.LogWithBaseSpanContext ctx) { return org.opensearch.sql.ast.dsl.AstDSL.stringLiteral(ctx.getText()); } + + // Visitor methods for search expressions + @Override + public SearchExpression visitGroupedExpression(OpenSearchPPLParser.GroupedExpressionContext ctx) { + return new SearchGroup((SearchExpression) visit(ctx.searchExpression())); + } + + @Override + public SearchExpression visitNotExpression(OpenSearchPPLParser.NotExpressionContext ctx) { + return new SearchNot((SearchExpression) visit(ctx.searchExpression())); + } + + @Override + public SearchExpression visitAndExpression(OpenSearchPPLParser.AndExpressionContext ctx) { + SearchExpression left = (SearchExpression) visit(ctx.searchExpression(0)); + SearchExpression right = (SearchExpression) visit(ctx.searchExpression(1)); + // Wrap the entire AND expression in parentheses + return new SearchGroup(new SearchAnd(left, right)); + } + + @Override + public SearchExpression visitOrExpression(OpenSearchPPLParser.OrExpressionContext ctx) { + SearchExpression left = (SearchExpression) visit(ctx.searchExpression(0)); + SearchExpression right = (SearchExpression) visit(ctx.searchExpression(1)); + // Wrap the entire OR expression in parentheses + return new SearchGroup(new SearchOr(left, right)); + } + + @Override + public SearchExpression visitTermExpression(OpenSearchPPLParser.TermExpressionContext ctx) { + return (SearchExpression) visit(ctx.searchTerm()); + } + + @Override + public SearchExpression visitSearchLiteralTerm(OpenSearchPPLParser.SearchLiteralTermContext ctx) { + return visitSearchLiteral(ctx.searchLiteral()); + } + + @Override + public SearchExpression visitSearchComparisonTerm( + OpenSearchPPLParser.SearchComparisonTermContext ctx) { + OpenSearchPPLParser.SearchFieldCompareContext fieldComp = + (OpenSearchPPLParser.SearchFieldCompareContext) ctx.searchFieldComparison(); + + Field field = (Field) visit(fieldComp.fieldExpression()); + SearchComparison.Operator op = + visitSearchComparisonOperator(fieldComp.searchComparisonOperator()); + + // Use SearchLiteral directly + SearchLiteral searchLit = visitSearchLiteral(fieldComp.searchLiteral()); + + return new SearchComparison(field, op, searchLit); + } + + @Override + public SearchExpression visitSearchInListTerm(OpenSearchPPLParser.SearchInListTermContext ctx) { + OpenSearchPPLParser.SearchFieldInValuesContext fieldIn = + (OpenSearchPPLParser.SearchFieldInValuesContext) ctx.searchFieldInList(); + + Field field = (Field) visit(fieldIn.fieldExpression()); + OpenSearchPPLParser.SearchLiteralsContext valueList = + (OpenSearchPPLParser.SearchLiteralsContext) fieldIn.searchLiteralList(); + List values = + valueList.searchLiteral().stream() + .map(this::visitSearchLiteral) + .collect(Collectors.toList()); + + return new SearchIn(field, values); + } + + // Helper method to determine the comparison operator + private SearchComparison.Operator visitSearchComparisonOperator( + OpenSearchPPLParser.SearchComparisonOperatorContext ctx) { + if (ctx instanceof OpenSearchPPLParser.EqualsContext) { + return SearchComparison.Operator.EQUALS; + } else if (ctx instanceof OpenSearchPPLParser.NotEqualsContext) { + return SearchComparison.Operator.NOT_EQUALS; + } else if (ctx instanceof OpenSearchPPLParser.LessThanContext) { + return SearchComparison.Operator.LESS_THAN; + } else if (ctx instanceof OpenSearchPPLParser.LessOrEqualContext) { + return SearchComparison.Operator.LESS_OR_EQUAL; + } else if (ctx instanceof OpenSearchPPLParser.GreaterThanContext) { + return SearchComparison.Operator.GREATER_THAN; + } else if (ctx instanceof OpenSearchPPLParser.GreaterOrEqualContext) { + return SearchComparison.Operator.GREATER_OR_EQUAL; + } + return SearchComparison.Operator.EQUALS; // Default to equals + } + + @Override + public SearchLiteral visitSearchLiteral(OpenSearchPPLParser.SearchLiteralContext ctx) { + if (ctx.stringLiteral() != null) { + // Use visit method to properly handle escaping + Literal stringLit = (Literal) visit(ctx.stringLiteral()); + String content = (String) stringLit.getValue(); + return new SearchLiteral(new Literal(content, DataType.STRING), content.contains(" ")); + } else if (ctx.numericLiteral() != null) { + Literal numericLiteral = (Literal) visit(ctx.numericLiteral()); + return new SearchLiteral(numericLiteral, false); + } else if (ctx.booleanLiteral() != null) { + // Boolean literal + Literal booleanLiteral = (Literal) visit(ctx.booleanLiteral()); + return new SearchLiteral(booleanLiteral, false); + } else if (ctx.ID() != null) { + return new SearchLiteral(new Literal(ctx.ID().getText(), DataType.STRING), false); + } else if (ctx.searchableKeyWord() != null) { + return new SearchLiteral( + new Literal(ctx.searchableKeyWord().getText(), DataType.STRING), false); + } + // Default + return new SearchLiteral(new Literal(ctx.getText(), DataType.STRING), false); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d888d451c10..d5c55d10258 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -79,6 +79,7 @@ import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; +import org.opensearch.sql.ast.tree.Search; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.SpanBin; import org.opensearch.sql.ast.tree.SubqueryAlias; @@ -243,6 +244,14 @@ public String visitTableFunction(TableFunction node, String context) { return StringUtils.format("source=%s(%s)", node.getFunctionName().toString(), arguments); } + @Override + public String visitSearch(Search node, String context) { + String source = node.getChild().get(0).accept(this, context); + String queryString = node.getQueryString(); + String anonymized = queryString.replaceAll(":\\S+", ":" + MASK_LITERAL); + return StringUtils.format("%s %s", source, anonymized); + } + @Override public String visitFilter(Filter node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java index 1715e5cbdd2..881a228a795 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java @@ -117,7 +117,8 @@ public void testFilterQueryWithOr() { @Test public void testFilterQueryWithOr2() { - String ppl = "source=EMP (DEPTNO = 20 or MGR = 30) and SAL > 1000 | fields EMPNO, ENAME"; + String ppl = + "source=EMP | where (DEPTNO = 20 or MGR = 30) and SAL > 1000 | fields EMPNO, ENAME"; RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1])\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExistsSubqueryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExistsSubqueryTest.java index d64c95dc6db..717ad65ce27 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExistsSubqueryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExistsSubqueryTest.java @@ -222,7 +222,7 @@ public void testUncorrelatedNotExistsSubquery() { public void testExistsSubqueryInFilter() { String ppl = """ - source=EMP exists [ + source=EMP | where exists [ source=SALGRADE | where SAL = HISAL ] @@ -255,7 +255,7 @@ public void testExistsSubqueryInFilter() { public void testNotExistsSubqueryInFilter() { String ppl = """ - source=EMP not exists [ + source=EMP | where not exists [ source=SALGRADE | where SAL = HISAL ] diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java index 8521723ff95..99731859e28 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java @@ -113,7 +113,7 @@ public void testTwoExpressionsInSubquery() { public void testFilterInSubquery() { String ppl = """ - source=EMP (DEPTNO, ENAME) in [ source=DEPT | fields DEPTNO, DNAME ] + source=EMP | where (DEPTNO, ENAME) in [ source=DEPT | fields DEPTNO, DNAME ] | sort - EMPNO | fields EMPNO, ENAME """; RelNode root = getRelNode(ppl); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a22ed58b222..4f09a3e02ee 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -40,6 +40,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.rareTopN; import static org.opensearch.sql.ast.dsl.AstDSL.relation; import static org.opensearch.sql.ast.dsl.AstDSL.rename; +import static org.opensearch.sql.ast.dsl.AstDSL.search; import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.span; import static org.opensearch.sql.ast.dsl.AstDSL.spath; @@ -106,8 +107,7 @@ public void setup() { @Test public void testSearchCommand() { - assertEqual( - "search source=t a=1", filter(relation("t"), compare("=", field("a"), intLiteral(1)))); + assertEqual("search source=t a=1", search(relation("t"), "a:1")); } @Test @@ -168,20 +168,18 @@ public void testSearchWithPrometheusQueryRangeWithNamedArguments() { @Test public void testSearchCommandString() { - assertEqual( - "search source=t a=\"a\"", - filter(relation("t"), compare("=", field("a"), stringLiteral("a")))); + assertEqual("search source=t a=\"a\"", search(relation("t"), "a:a")); } @Test public void testSearchCommandWithoutSearch() { - assertEqual("source=t a=1", filter(relation("t"), compare("=", field("a"), intLiteral(1)))); + assertEqual( + "source=t | where a=1", filter(relation("t"), compare("=", field("a"), intLiteral(1)))); } @Test public void testSearchCommandWithFilterBeforeSource() { - assertEqual( - "search a=1 source=t", filter(relation("t"), compare("=", field("a"), intLiteral(1)))); + assertEqual("search a=1 source=t", search(relation("t"), "a:1")); } @Test @@ -579,7 +577,7 @@ public void testEvalCommand() { @Test public void testIndexName() { assertEqual( - "source=`log.2020.04.20.` a=1", + "source=`log.2020.04.20.` | where a=1", filter(relation("log.2020.04.20."), compare("=", field("a"), intLiteral(1)))); assertEqual("describe `log.2020.04.20.`", describe(mappingTable("log.2020.04.20."))); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index c9a3028b826..31678148c9c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -40,6 +40,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.projectWithArg; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; import static org.opensearch.sql.ast.dsl.AstDSL.relation; +import static org.opensearch.sql.ast.dsl.AstDSL.search; import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; @@ -63,40 +64,47 @@ public class AstExpressionBuilderTest extends AstBuilderTest { @Test public void testLogicalNotExpr() { assertEqual( - "source=t not a=1", filter(relation("t"), not(compare("=", field("a"), intLiteral(1))))); + "source=t | where not a=1", + filter(relation("t"), not(compare("=", field("a"), intLiteral(1))))); + assertEqual("source=t not a=1", search(relation("t"), "NOT(a:1)")); } @Test public void testLogicalOrExpr() { assertEqual( - "source=t a=1 or b=2", + "source=t | where a=1 or b=2", filter( relation("t"), or(compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(2))))); + assertEqual("source=t a=1 or b=2", search(relation("t"), "(a:1 OR b:2)")); } @Test public void testLogicalAndExpr() { assertEqual( - "source=t a=1 and b=2", + "source=t | where a=1 and b=2", filter( relation("t"), and(compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(2))))); + assertEqual("source=t a=1 and b=2", search(relation("t"), "(a:1 AND b:2)")); } @Test public void testLogicalAndExprWithoutKeywordAnd() { assertEqual( - "source=t a=1 b=2", + "source=t | where a=1 and b=2", filter( relation("t"), and(compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(2))))); + assertEqual("source=t a=1 b=2", search(relation("t"), "(a:1) AND (b:2)")); + assertEqual( + "source=t a=1 b=2 c=2 text", search(relation("t"), "(a:1) AND (b:2) AND (c:2) AND (text)")); } @Test public void testLogicalXorExpr() { assertEqual( - "source=t a=1 xor b=2", + "source=t | where a=1 xor b=2", filter( relation("t"), xor(compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(2))))); @@ -105,7 +113,7 @@ public void testLogicalXorExpr() { @Test public void testLogicalAndOr() { assertEqual( - "source=t a=1 and b=2 and c=3 or d=4", + "source=t | where a=1 and b=2 and c=3 or d=4", filter( relation("t"), or( @@ -115,12 +123,15 @@ public void testLogicalAndOr() { compare("=", field("b"), intLiteral(2))), compare("=", field("c"), intLiteral(3))), compare("=", field("d"), intLiteral(4))))); + assertEqual( + "source=t a=1 and b=2 and c=3 or d=4", + search(relation("t"), "((a:1 AND b:2) AND (c:3 OR d:4))")); } @Test public void testLogicalParenthetic() { assertEqual( - "source=t (a=1 or b=2) and (c=3 or d=4)", + "source=t | where (a=1 or b=2) and (c=3 or d=4)", filter( relation("t"), and( @@ -130,12 +141,16 @@ public void testLogicalParenthetic() { or( compare("=", field("c"), intLiteral(3)), compare("=", field("d"), intLiteral(4)))))); + + assertEqual( + "source=t (a=1 or b=2) and (c=3 or d=4)", + search(relation("t"), "(((a:1 OR b:2)) AND ((c:3 OR d:4)))")); } @Test public void testLogicalNotAndXorOr() { assertEqual( - "source=t a=1 xor b=2 and not c=3 or d=4", + "source=t | where a=1 xor b=2 and not c=3 or d=4", filter( relation("t"), or( @@ -189,23 +204,27 @@ public void testLikeOperatorCaseInsensitive() { @Test public void testBooleanIsNullFunction() { - assertEqual("source=t isnull(a)", filter(relation("t"), function("is null", field("a")))); - assertEqual("source=t ISNULL(a)", filter(relation("t"), function("is null", field("a")))); + assertEqual( + "source=t | where isnull(a)", filter(relation("t"), function("is null", field("a")))); + assertEqual( + "source=t | where ISNULL(a)", filter(relation("t"), function("is null", field("a")))); } @Test public void testBooleanIsNotNullFunction() { assertEqual( - "source=t isnotnull(a)", filter(relation("t"), function("is not null", field("a")))); + "source=t | where isnotnull(a)", + filter(relation("t"), function("is not null", field("a")))); assertEqual( - "source=t ISNOTNULL(a)", filter(relation("t"), function("is not null", field("a")))); + "source=t | where ISNOTNULL(a)", + filter(relation("t"), function("is not null", field("a")))); } /** Todo. search operator should not include functionCall, need to change antlr. */ @Ignore("search operator should not include functionCall, need to change antlr") public void testEvalExpr() { assertEqual( - "source=t f=abs(a)", + "source=t | where f=abs(a)", filter(relation("t"), equalTo(field("f"), function("abs", field("a"))))); } @@ -388,34 +407,40 @@ public void testBinaryOperationExprPrecedence() { @Test public void testCompareExpr() { assertEqual( - "source=t a='b'", filter(relation("t"), compare("=", field("a"), stringLiteral("b")))); + "source=t | where a='b'", + filter(relation("t"), compare("=", field("a"), stringLiteral("b")))); + assertEqual("source=t a='b'", search(relation("t"), "a:b")); } @Test public void testCompareFieldsExpr() { - assertEqual("source=t a>b", filter(relation("t"), compare(">", field("a"), field("b")))); + assertEqual( + "source=t | where a>b", filter(relation("t"), compare(">", field("a"), field("b")))); + assertEqual("source=t a>b", search(relation("t"), "a:>b")); } @Test public void testDoubleEqualCompareExpr() { // Test that == is correctly mapped to = operator internally - assertEqual("source=t a==1", filter(relation("t"), compare("=", field("a"), intLiteral(1)))); assertEqual( - "source=t a=='hello'", + "source=t | where a==1", filter(relation("t"), compare("=", field("a"), intLiteral(1)))); + assertEqual( + "source=t | where a=='hello'", filter(relation("t"), compare("=", field("a"), stringLiteral("hello")))); - assertEqual("source=t a==b", filter(relation("t"), compare("=", field("a"), field("b")))); + assertEqual( + "source=t | where a==b", filter(relation("t"), compare("=", field("a"), field("b")))); } @Test public void testMixedEqualOperators() { // Test that both = and == can be used in the same expression assertEqual( - "source=t a=1 and b==2", + "source=t | where a=1 and b==2", filter( relation("t"), and(compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(2))))); assertEqual( - "source=t a==1 or b=2", + "source=t | where a==1 or b=2", filter( relation("t"), or(compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(2))))); @@ -423,8 +448,10 @@ public void testMixedEqualOperators() { @Test public void testInExpr() { + assertEqual("source=t f in (1, 2, 3)", search(relation("t"), "f:( 1 OR 2 OR 3 )")); + assertEqual( - "source=t f in (1, 2, 3)", + "source=t | where f in (1, 2, 3)", filter(relation("t"), in(field("f"), intLiteral(1), intLiteral(2), intLiteral(3)))); } @@ -880,26 +907,33 @@ public void testNestedFieldNameWithSpecialChars() { @Test public void testStringLiteralExpr() { + assertEqual("source=t a=\"string\"", search(relation("t"), "a:string")); assertEqual( - "source=t a=\"string\"", + "source=t | where a=\"string\"", filter(relation("t"), compare("=", field("a"), stringLiteral("string")))); } @Test public void testIntegerLiteralExpr() { assertEqual( - "source=t a=1 b=-1", + "source=t | where a=1 and b=-1", filter( relation("t"), and( compare("=", field("a"), intLiteral(1)), compare("=", field("b"), intLiteral(-1))))); + + assertEqual("source=t a=1 b=-1", search(relation("t"), "(a:1) AND (b:-1)")); } @Test public void testLongLiteralExpr() { assertEqual( "source=t a=1234567890123 b=-1234567890123", + search(relation("t"), "(a:1234567890123) AND (b:-1234567890123)")); + + assertEqual( + "source=t | where a=1234567890123 and b=-1234567890123", filter( relation("t"), and( @@ -909,32 +943,48 @@ public void testLongLiteralExpr() { @Test public void testDoubleLiteralExpr() { + assertEqual("source=t b=0.1d", search(relation("t"), "b:0.1")); assertEqual( - "source=t b=0.1d", filter(relation("t"), compare("=", field("b"), doubleLiteral(0.1)))); + "source=t | where b=0.1d", + filter(relation("t"), compare("=", field("b"), doubleLiteral(0.1)))); } @Test public void testFloatLiteralExpr() { + assertEqual("source=t b=0.1f", search(relation("t"), "b:0.1")); assertEqual( - "source=t b=0.1f", filter(relation("t"), compare("=", field("b"), floatLiteral(0.1f)))); + "source=t | where b=0.1f", + filter(relation("t"), compare("=", field("b"), floatLiteral(0.1f)))); } @Test public void testDecimalLiteralExpr() { + assertEqual("source=t b=0.1", search(relation("t"), "b:0.1")); assertEqual( - "source=t b=0.1", filter(relation("t"), compare("=", field("b"), decimalLiteral(0.1)))); + "source=t | where b=0.1", + filter(relation("t"), compare("=", field("b"), decimalLiteral(0.1)))); } @Test public void testBooleanLiteralExpr() { + assertEqual("source=t a=true", search(relation("t"), "a:true")); + assertEqual( + "source=t | where a=true", + filter(relation("t"), compare("=", field("a"), booleanLiteral(true)))); + } + + @Test + public void testBackQuotedFieldNames() { + assertEqual("source=t `first name`=true", search(relation("t"), "first\\ name:true")); assertEqual( - "source=t a=true", filter(relation("t"), compare("=", field("a"), booleanLiteral(true)))); + "source=t | where `first name`=true", + filter(relation("t"), compare("=", field("first name"), booleanLiteral(true)))); } @Test public void testIntervalLiteralExpr() { assertEqual( - "source=t a = interval 1 day", + "source=t | where a = interval 1 day", filter( relation("t"), compare("=", field("a"), intervalLiteral(1, DataType.INTEGER, "day")))); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstSearchExpressionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstSearchExpressionTest.java new file mode 100644 index 00000000000..e89958c64dd --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstSearchExpressionTest.java @@ -0,0 +1,518 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.parser; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; + +import org.junit.Test; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.tree.Relation; +import org.opensearch.sql.ast.tree.Search; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; + +public class AstSearchExpressionTest { + + private final PPLSyntaxParser parser = new PPLSyntaxParser(); + private final Settings settings = mock(Settings.class); + + @Test + public void testSimpleSearchTerm() { + String query = "search \"error\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Double quotes are preserved for phrase search + assertEquals("error", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testFieldComparison() { + String query = "search status=200 source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("status:200", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testAndExpression() { + String query = "search status=200 AND message=\"success\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("(status:200 AND message:success)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testOrExpression() { + String query = "search error OR warning source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("(error OR warning)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testNotExpression() { + String query = "search NOT error source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("NOT(error)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testGroupedExpression() { + String query = "search (error OR warning) AND status=500 source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("(((error OR warning)) AND status:500)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testInList() { + String query = "search status IN (200, 201, 204) source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("status:( 200 OR 201 OR 204 )", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testComparisonOperators() { + String query = "search age>18 source=users"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("age:>18", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("users", relation.getTableQualifiedName().toString()); + } + + @Test + public void testNotEqualsOperator() { + String query = "search status!=200 source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // != means field must exist and not equal to value + assertEquals("( _exists_:status AND NOT status:200 )", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testNotEqualsWithAndExpression() { + String query = "search status!=500 AND level=\"INFO\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals( + "(( _exists_:status AND NOT status:500 ) AND level:INFO)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testNotVsNotEquals() { + // Test the difference between NOT field:value and field!=value + + // NOT field:value - returns everything except field="value", including docs where field doesn't + // exist + String query1 = "search NOT status=\"200\" source=logs"; + Node plan1 = buildPlan(query1); + assertTrue(plan1 instanceof Search); + Search search1 = (Search) plan1; + assertEquals("NOT(status:200)", search1.getQueryString()); + + // field!=value - returns only docs where field exists AND is not "value" + String query2 = "search status!=\"200\" source=logs"; + Node plan2 = buildPlan(query2); + assertTrue(plan2 instanceof Search); + Search search2 = (Search) plan2; + assertEquals("( _exists_:status AND NOT status:200 )", search2.getQueryString()); + } + + @Test + public void testEscapedSpecialCharacters() { + // Test escaping special characters in values + String query = "search message=\"Error: (1+1)\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Special characters should be escaped + assertEquals("message:\"Error\\: \\(1\\+1\\)\"", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testEscapedWildcardAndQuestion() { + // Test that wildcards are NOT escaped to support pattern matching + String query = "search filename=\"test*.txt?\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // * and ? should NOT be escaped to allow wildcard pattern matching + assertEquals("filename:test*.txt?", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testEscapedBooleanOperators() { + // Test escaping && and || in values + String query = "search code=\"a && b || c\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // && and || should be escaped + assertEquals("code:\"a \\&\\& b \\|\\| c\"", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testHyphenInStrings() { + // Test that hyphens in string values are escaped + String query = "search date=\"2024-01-01\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Hyphens in string values should be escaped + assertEquals("date:2024\\-01\\-01", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testNegativeNumbers() { + String query = "search a=1 b=-1 source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Negative numbers should be properly handled + assertEquals("(a:1) AND (b:-1)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testMultipleSearchExpressionsCombinedWithAnd() { + // Test that multiple searchExpressions without explicit AND/OR are combined with AND + String query = "search status=200 message=\"success\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Multiple expressions should be combined with AND + assertEquals("(status:200) AND (message:success)", search.getQueryString()); + + // Check the child is a Relation + assertTrue(search.getChild().get(0) instanceof Relation); + Relation relation = (Relation) search.getChild().get(0); + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testMinimalEscapingForURL() { + // URLs should work without escaping colons and slashes + String query = "search url=\"http://api.example.com:8080/path\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Lucene special chars are escaped internally, user doesn't need to escape + assertEquals("url:http\\:\\/\\/api.example.com\\:8080\\/path", search.getQueryString()); + } + + @Test + public void testMinimalEscapingForEmail() { + // Email addresses with + and @ should work without escaping + String query = "search email=\"user+tag@company.com\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("email:user\\+tag@company.com", search.getQueryString()); + } + + @Test + public void testOnlyEscapeQuotesBackslashPipe() { + // Test that only ", \, and | need escaping by user + // Quotes must be escaped + String query1 = "search message=\"She said \\\"Hello\\\"\" source=logs"; + Node plan1 = buildPlan(query1); + Search search1 = (Search) plan1; + assertEquals("message:\"She said \\\"Hello\\\"\"", search1.getQueryString()); + + // Backslash must be escaped + String query2 = "search path=\"C:\\\\\\\\Users\\\\\\\\file.txt\" source=logs"; + Node plan2 = buildPlan(query2); + Search search2 = (Search) plan2; + assertEquals("path:C\\:\\\\Users\\\\file.txt", search2.getQueryString()); + + String query3 = "search code=\"a || b\" source=logs"; + Node plan3 = buildPlan(query3); + Search search3 = (Search) plan3; + assertEquals("code:\"a \\|\\| b\"", search3.getQueryString()); + } + + @Test + public void testSpecialCharsNoEscapingNeeded() { + // Test that common special characters work without user escaping + String query = "search log=\"[ERROR] Failed: (reason #1)\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // These chars are escaped for Lucene internally, not by user + assertEquals("log:\"\\[ERROR\\] Failed\\: \\(reason #1\\)\"", search.getQueryString()); + } + + @Test + public void testWildcardsPreserved() { + // Wildcards * and ? should not be escaped to allow pattern matching + String query = "search file=\"*.log\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("file:*.log", search.getQueryString()); + + String query2 = "search name=\"user?\" source=logs"; + Node plan2 = buildPlan(query2); + Search search2 = (Search) plan2; + assertEquals("name:user?", search2.getQueryString()); + } + + @Test + public void testBasicIndexSearch() { + String query = "search source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Relation); + Relation relation = (Relation) plan; + assertEquals("logs", relation.getTableQualifiedName().toString()); + } + + @Test + public void testSingleAmpersandAtEndOfString() { + // Test single & at the end of string - covers branch where i+1 >= text.length() + String query = "search message=\"test&\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Single & at end should be escaped + assertEquals("message:test\\&", search.getQueryString()); + } + + @Test + public void testSinglePipeAtEndOfString() { + // Test single | at the end of string - covers branch where i+1 >= text.length() + String query = "search message=\"test|\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Single | at end should be escaped + assertEquals("message:test\\|", search.getQueryString()); + } + + @Test + public void testSingleAmpersandFollowedByDifferentChar() { + // Test single & followed by a different character (not another &) + String query = "search message=\"a&b\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Single & should be escaped + assertEquals("message:a\\&b", search.getQueryString()); + } + + @Test + public void testSinglePipeFollowedByDifferentChar() { + // Test single | followed by a different character (not another |) + String query = "search message=\"a|b\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Single | should be escaped + assertEquals("message:a\\|b", search.getQueryString()); + } + + @Test + public void testMixedSingleAndDoubleAmpersands() { + // Test mix of single & and double && + String query = "search message=\"a & b && c\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Single & should be escaped, double && should be double-escaped + assertEquals("message:\"a \\& b \\&\\& c\"", search.getQueryString()); + } + + @Test + public void testMixedSingleAndDoublePipes() { + // Test mix of single | and double || + String query = "search message=\"a | b || c\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Single | should be escaped, double || should be double-escaped + assertEquals("message:\"a \\| b \\|\\| c\"", search.getQueryString()); + } + + @Test + public void testAllLuceneSpecialCharacters() { + // Test all special characters in LUCENE_SPECIAL_CHARS + String query = "search message=\"+test-&|!(){}[]^\\\"~:/\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // All special chars should be escaped except wildcards + assertEquals( + "message:\\+test\\-\\&\\|\\!\\(\\)\\{\\}\\[\\]\\^\\\"\\~\\:\\/", search.getQueryString()); + } + + @Test + public void testNestedFieldAccess() { + // Test nested field access with dots + String query = "search user.name=\"john\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Dots in field names should not be escaped + assertEquals("user.name:john", search.getQueryString()); + } + + @Test + public void testComplexNestedField() { + // Test complex nested field path + String query = "search request.headers.user_agent=\"Mozilla\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + // Dots in field names should not be escaped + assertEquals("request.headers.user_agent:Mozilla", search.getQueryString()); + } + + @Test + public void testEmptyStringValue() { + // Test empty string value + String query = "search myfield=\"\" source=logs"; + Node plan = buildPlan(query); + + assertTrue(plan instanceof Search); + Search search = (Search) plan; + assertEquals("myfield:", search.getQueryString()); + } + + @Test + public void testSingleCharacterSpecialValues() { + // Test single special characters as values + String query1 = "search myfield=\"&\" source=logs"; + Node plan1 = buildPlan(query1); + Search search1 = (Search) plan1; + assertEquals("myfield:\\&", search1.getQueryString()); + + String query2 = "search myfield=\"|\" source=logs"; + Node plan2 = buildPlan(query2); + Search search2 = (Search) plan2; + assertEquals("myfield:\\|", search2.getQueryString()); + } + + private Node buildPlan(String query) { + AstBuilder astBuilder = new AstBuilder(query, settings); + return astBuilder.visit(parser.parse(query)); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index d4abadbf8bc..c0a14c45cbc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -9,12 +9,7 @@ package org.opensearch.sql.ppl.parser; import static org.junit.Assert.assertEquals; -import static org.opensearch.sql.ast.dsl.AstDSL.compare; -import static org.opensearch.sql.ast.dsl.AstDSL.field; -import static org.opensearch.sql.ast.dsl.AstDSL.filter; -import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; -import static org.opensearch.sql.ast.dsl.AstDSL.project; -import static org.opensearch.sql.ast.dsl.AstDSL.relation; +import static org.opensearch.sql.ast.dsl.AstDSL.*; import static org.opensearch.sql.executor.QueryType.PPL; import org.junit.Rule; @@ -40,17 +35,20 @@ public class AstStatementBuilderTest { @Test public void buildQueryStatement() { assertEqual( - "search source=t a=1", + "search source=t | where a=1", new Query( project(filter(relation("t"), compare("=", field("a"), intLiteral(1))), AllFields.of()), 0, PPL)); + assertEqual( + "search source=t a=1", + new Query(project(search(relation("t"), "a:1"), AllFields.of()), 0, PPL)); } @Test public void buildExplainStatement() { assertExplainEqual( - "search source=t a=1", + "search source=t | where a=1", new Explain( new Query( project( @@ -58,6 +56,9 @@ public void buildExplainStatement() { 0, PPL), PPL)); + assertExplainEqual( + "search source=t a=1", + new Explain(new Query(project(search(relation("t"), "a:1"), AllFields.of()), 0, PPL), PPL)); } private void assertEqual(String query, Statement expectedStatement) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index c805e5a5dfb..b9d91b2e6ae 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -33,7 +33,7 @@ public class PPLQueryDataAnonymizerTest { @Test public void testSearchCommand() { - assertEquals("source=t | where a = ***", anonymize("search source=t a=1")); + assertEquals("source=t a:***", anonymize("search source=t a=1")); } @Test @@ -497,7 +497,8 @@ public void testScalarSubquery() { anonymize("source=t | eval id = [ source=s | stats max(b) ] | fields id")); assertEquals( "source=t | where id > [ source=s | where id = uid | stats max(b) ] | fields + id", - anonymize("source=t id > [ source=s | where id = uid | stats max(b) ] | fields id")); + anonymize( + "source=t | where id > [ source=s | where id = uid | stats max(b) ] | fields id")); } @Test