diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 64f3e5a8f0e..6d0543f7bab 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -677,9 +677,16 @@ private static SqlOperandTypeChecker extractTypeCheckerFromUDF( } void populate() { - // register operators for comparison - registerOperator(NOTEQUAL, PPLBuiltinOperators.NOT_EQUALS_IP, SqlStdOperatorTable.NOT_EQUALS); - registerOperator(EQUAL, PPLBuiltinOperators.EQUALS_IP, SqlStdOperatorTable.EQUALS); + // register operators for comparison with wildcard and IP support + // Resolution order: IP types first, then all other types + + // IP support (handles both wildcard and exact match for IP types) + register(EQUAL, new WildcardAwareIpEquals()); + register(NOTEQUAL, new WildcardAwareIpNotEquals()); + + // General support (handles both wildcard and exact match for all other types) + register(EQUAL, new WildcardAwareEqualsFunc()); + register(NOTEQUAL, new WildcardAwareNotEqualsFunc()); registerOperator(GREATER, PPLBuiltinOperators.GREATER_IP, SqlStdOperatorTable.GREATER_THAN); registerOperator(GTE, PPLBuiltinOperators.GTE_IP, SqlStdOperatorTable.GREATER_THAN_OR_EQUAL); registerOperator(LESS, PPLBuiltinOperators.LESS_IP, SqlStdOperatorTable.LESS_THAN); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareEqualsFunc.java b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareEqualsFunc.java new file mode 100644 index 00000000000..4d86f66b7de --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareEqualsFunc.java @@ -0,0 +1,96 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function; + +import java.util.List; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SameOperandTypeChecker; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.expression.function.PPLFuncImpTable.FunctionImp2; + +/** + * Wildcard-aware equals function that resolves to LIKE when wildcards are detected in string + * values. Supports OpenSearch native wildcards: * (zero or more chars) and ? (exactly one char). + * Also supports numeric fields by casting them to strings for wildcard matching. + */ +public class WildcardAwareEqualsFunc implements FunctionImp2 { + + @Override + public RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2) { + // Check if the second argument is a string literal with wildcards + if (arg2.isA(SqlKind.LITERAL) && SqlTypeFamily.CHARACTER.contains(arg2.getType())) { + String value = ((RexLiteral) arg2).getValueAs(String.class); + if (value != null && containsWildcards(value)) { + if (SqlTypeFamily.CHARACTER.contains(arg1.getType())) { + // Direct wildcard matching for string fields + String convertedValue = convertOpenSearchWildcardsToSql(value); + RexNode convertedLiteral = builder.makeLiteral(convertedValue); + return builder.makeCall( + SqlLibraryOperators.ILIKE, arg1, convertedLiteral, builder.makeLiteral("\\")); + } else if (SqlTypeFamily.NUMERIC.contains(arg1.getType()) || + SqlTypeFamily.DATETIME.contains(arg1.getType()) || + arg1.getType().getSqlTypeName().getName().equals("IP")) { + // For non-string fields with wildcards, we need to prevent incorrect wildcard pushdown + // Cast to VARCHAR and use regular EQUALS with the pattern + // This will filter in-memory, not push down as wildcard query + RexNode castToString = + builder.makeCast(builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), arg1); + + // Don't convert wildcards - keep them as-is for in-memory filtering + // Using EQUALS instead of LIKE/ILIKE to prevent wildcard query generation + return builder.makeCall(SqlStdOperatorTable.EQUALS, castToString, arg2); + } + } + } + + // Fall back to standard equals for non-wildcard cases + return builder.makeCall(SqlStdOperatorTable.EQUALS, arg1, arg2); + } + + @Override + public PPLTypeChecker getTypeChecker() { + // Support standard same-type equality + // For wildcard support, only allow STRING types + // Numeric fields with wildcards should use explicit CAST + return PPLTypeChecker.wrapUDT(List.of( + // Standard same-type equality + List.of(ExprCoreType.BOOLEAN, ExprCoreType.BOOLEAN), + List.of(ExprCoreType.INTEGER, ExprCoreType.INTEGER), + List.of(ExprCoreType.LONG, ExprCoreType.LONG), + List.of(ExprCoreType.FLOAT, ExprCoreType.FLOAT), + List.of(ExprCoreType.DOUBLE, ExprCoreType.DOUBLE), + List.of(ExprCoreType.STRING, ExprCoreType.STRING), + List.of(ExprCoreType.DATE, ExprCoreType.DATE), + List.of(ExprCoreType.TIME, ExprCoreType.TIME), + List.of(ExprCoreType.TIMESTAMP, ExprCoreType.TIMESTAMP), + List.of(ExprCoreType.IP, ExprCoreType.IP) + + // REMOVED: Wildcard support for non-string types + // This prevents incorrect wildcard query generation on numeric fields + // Users should use: CAST(int_field AS STRING) = "1*" + )); + } + + protected boolean containsWildcards(String value) { + return value.contains("*") || value.contains("?"); + } + + /** + * Convert OpenSearch/Lucene wildcards to SQL LIKE wildcards. * (zero or more chars) -> % (zero or + * more chars) ? (exactly one char) -> _ (exactly one char) + */ + protected String convertOpenSearchWildcardsToSql(String value) { + return value.replace("*", "%").replace("?", "_"); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareIpEquals.java b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareIpEquals.java new file mode 100644 index 00000000000..26c17cb4163 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareIpEquals.java @@ -0,0 +1,156 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function; + +import inet.ipaddr.IPAddressString; +import java.util.List; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.expression.function.PPLFuncImpTable.FunctionImp2; + +/** + * Wildcard-aware IP address equals function that supports patterns like: - 192.168.*.* (matches any + * IP starting with 192.168) - 192.168.1.* (matches 192.168.1.0 to 192.168.1.255) - 192.168.?.1 + * (matches 192.168.0.1 to 192.168.9.1) + */ +public class WildcardAwareIpEquals implements FunctionImp2 { + + @Override + public RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2) { + // Check if this is an IP comparison with wildcards + if (isIpType(arg1) && arg2.isA(SqlKind.LITERAL)) { + String value = ((RexLiteral) arg2).getValueAs(String.class); + if (value != null && containsWildcards(value)) { + // Convert wildcard IP pattern to range check + return createIpRangeCheck(builder, arg1, value, false); + } + } else if (arg1.isA(SqlKind.LITERAL) && isIpType(arg2)) { + String value = ((RexLiteral) arg1).getValueAs(String.class); + if (value != null && containsWildcards(value)) { + // Handle reverse order (literal first) + return createIpRangeCheck(builder, arg2, value, false); + } + } + + // Fall back to standard IP equals + return builder.makeCall(PPLBuiltinOperators.EQUALS_IP, arg1, arg2); + } + + @Override + public PPLTypeChecker getTypeChecker() { + // Accept IP type with IP or STRING (for wildcard patterns) + return PPLTypeChecker.wrapUDT(List.of( + List.of(ExprCoreType.IP, ExprCoreType.IP), + List.of(ExprCoreType.IP, ExprCoreType.STRING) + )); + } + + protected boolean isIpType(RexNode node) { + // Check if the node represents an IP type field + String typeName = node.getType().getSqlTypeName().getName(); + return "IP".equals(typeName) || node.getType().toString().contains("IP"); + } + + protected boolean containsWildcards(String value) { + return value.contains("*") || value.contains("?"); + } + + /** + * Creates a range check for IP wildcard patterns. For example, 192.168.*.* becomes: ip >= + * 192.168.0.0 AND ip <= 192.168.255.255 + */ + protected RexNode createIpRangeCheck( + RexBuilder builder, RexNode ipField, String pattern, boolean negate) { + try { + // Parse the IP pattern and calculate range + IpRange range = calculateIpRange(pattern); + + // Create range comparison: ip >= minIp AND ip <= maxIp + RexNode minIpLiteral = builder.makeLiteral(range.min); + RexNode maxIpLiteral = builder.makeLiteral(range.max); + + RexNode gteCheck = builder.makeCall(PPLBuiltinOperators.GTE_IP, ipField, minIpLiteral); + RexNode lteCheck = builder.makeCall(PPLBuiltinOperators.LTE_IP, ipField, maxIpLiteral); + + RexNode rangeCheck = builder.makeCall(SqlStdOperatorTable.AND, gteCheck, lteCheck); + + // Negate if needed (for NOT_EQUALS) + return negate ? builder.makeCall(SqlStdOperatorTable.NOT, rangeCheck) : rangeCheck; + + } catch (Exception e) { + // If pattern parsing fails, fall back to standard comparison + return builder.makeCall( + negate ? PPLBuiltinOperators.NOT_EQUALS_IP : PPLBuiltinOperators.EQUALS_IP, + ipField, + builder.makeLiteral(pattern)); + } + } + + /** Calculates the IP range for a wildcard pattern. */ + private IpRange calculateIpRange(String pattern) { + String[] octets = pattern.split("\\."); + if (octets.length != 4) { + throw new IllegalArgumentException("Invalid IP pattern: " + pattern); + } + + StringBuilder minIp = new StringBuilder(); + StringBuilder maxIp = new StringBuilder(); + + for (int i = 0; i < 4; i++) { + if (i > 0) { + minIp.append("."); + maxIp.append("."); + } + + String octet = octets[i]; + if ("*".equals(octet)) { + minIp.append("0"); + maxIp.append("255"); + } else if ("?".equals(octet)) { + minIp.append("0"); + maxIp.append("9"); + } else if (octet.contains("*")) { + // Handle patterns like "19*" -> 190-199 + String prefix = octet.substring(0, octet.indexOf('*')); + minIp.append(prefix).append("0"); + maxIp.append(prefix).append("9"); + } else if (octet.contains("?")) { + // Handle patterns like "19?" -> 190-199 + String prefix = octet.substring(0, octet.indexOf('?')); + minIp.append(prefix).append("0"); + maxIp.append(prefix).append("9"); + } else { + // Exact octet value + minIp.append(octet); + maxIp.append(octet); + } + } + + return new IpRange(minIp.toString(), maxIp.toString()); + } + + private static class IpRange { + final String min; + final String max; + + IpRange(String min, String max) { + // Validate IP addresses + IPAddressString minAddr = new IPAddressString(min); + IPAddressString maxAddr = new IPAddressString(max); + + if (!minAddr.isValid() || !maxAddr.isValid()) { + throw new IllegalArgumentException("Invalid IP range: " + min + " - " + max); + } + + this.min = min; + this.max = max; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareIpNotEquals.java b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareIpNotEquals.java new file mode 100644 index 00000000000..b5b074b6e7b --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareIpNotEquals.java @@ -0,0 +1,36 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function; + +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; + +/** Wildcard-aware IP address not-equals function. */ +public class WildcardAwareIpNotEquals extends WildcardAwareIpEquals { + + @Override + public RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2) { + // Check if this is an IP comparison with wildcards + if (isIpType(arg1) && arg2.isA(SqlKind.LITERAL)) { + String value = ((RexLiteral) arg2).getValueAs(String.class); + if (value != null && containsWildcards(value)) { + // Create negated range check + return createIpRangeCheck(builder, arg1, value, true); + } + } else if (arg1.isA(SqlKind.LITERAL) && isIpType(arg2)) { + String value = ((RexLiteral) arg1).getValueAs(String.class); + if (value != null && containsWildcards(value)) { + // Handle reverse order (literal first) + return createIpRangeCheck(builder, arg2, value, true); + } + } + + // Fall back to standard IP not-equals + return builder.makeCall(PPLBuiltinOperators.NOT_EQUALS_IP, arg1, arg2); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareNotEqualsFunc.java b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareNotEqualsFunc.java new file mode 100644 index 00000000000..1951b4c3e20 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/WildcardAwareNotEqualsFunc.java @@ -0,0 +1,91 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function; + +import java.util.List; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SameOperandTypeChecker; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.expression.function.PPLFuncImpTable.FunctionImp2; + +/** + * Wildcard-aware not-equals function that resolves to NOT LIKE when wildcards are detected. Also + * supports numeric fields by casting them to strings for wildcard matching. + */ +public class WildcardAwareNotEqualsFunc implements FunctionImp2 { + + @Override + public RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2) { + // Check if the second argument is a string literal with wildcards + if (arg2.isA(SqlKind.LITERAL) && SqlTypeFamily.CHARACTER.contains(arg2.getType())) { + String value = ((RexLiteral) arg2).getValueAs(String.class); + if (value != null && containsWildcards(value)) { + // ONLY handle character/string types with wildcards + // For numeric, IP, date etc., users should use explicit CAST + if (SqlTypeFamily.CHARACTER.contains(arg1.getType())) { + // Direct wildcard matching for string fields + String convertedValue = convertOpenSearchWildcardsToSql(value); + RexNode convertedLiteral = builder.makeLiteral(convertedValue); + // Convert to NOT LIKE query for wildcard matching + return builder.makeCall( + SqlStdOperatorTable.NOT, + builder.makeCall( + SqlLibraryOperators.ILIKE, arg1, convertedLiteral, builder.makeLiteral("\\"))); + } + // For non-character types with wildcards, don't handle here + // This will cause a type error, which is correct behavior + // Users should explicitly CAST numeric fields to string for wildcard matching + } + } + + // Fall back to standard not-equals for non-wildcard cases + return builder.makeCall(SqlStdOperatorTable.NOT_EQUALS, arg1, arg2); + } + + @Override + public PPLTypeChecker getTypeChecker() { + // Support standard same-type not-equality + // For wildcard support, only allow STRING types + // Numeric fields with wildcards should use explicit CAST + return PPLTypeChecker.wrapUDT(List.of( + // Standard same-type not-equality + List.of(ExprCoreType.BOOLEAN, ExprCoreType.BOOLEAN), + List.of(ExprCoreType.INTEGER, ExprCoreType.INTEGER), + List.of(ExprCoreType.LONG, ExprCoreType.LONG), + List.of(ExprCoreType.FLOAT, ExprCoreType.FLOAT), + List.of(ExprCoreType.DOUBLE, ExprCoreType.DOUBLE), + List.of(ExprCoreType.STRING, ExprCoreType.STRING), + List.of(ExprCoreType.DATE, ExprCoreType.DATE), + List.of(ExprCoreType.TIME, ExprCoreType.TIME), + List.of(ExprCoreType.TIMESTAMP, ExprCoreType.TIMESTAMP), + List.of(ExprCoreType.IP, ExprCoreType.IP) + + // REMOVED: Wildcard support for non-string types + // This prevents incorrect wildcard query generation on numeric fields + // Users should use: CAST(int_field AS STRING) != "1*" + )); + } + + private boolean containsWildcards(String value) { + return value.contains("*") || value.contains("?"); + } + + /** + * Convert OpenSearch/Lucene wildcards to SQL LIKE wildcards. * (zero or more chars) -> % (zero or + * more chars) ? (exactly one char) -> _ (exactly one char) + */ + private String convertOpenSearchWildcardsToSql(String value) { + return value.replace("*", "%").replace("?", "_"); + } +} diff --git a/docs/user/dql/expressions.rst b/docs/user/dql/expressions.rst index 18a5bdce8f6..b186cb6191a 100644 --- a/docs/user/dql/expressions.rst +++ b/docs/user/dql/expressions.rst @@ -128,11 +128,11 @@ Operators +----------------+----------------------------------------+ | < | Less than operator | +----------------+----------------------------------------+ -| != | Not equal operator | +| != | Not equal operator (supports * wildcards) | +----------------+----------------------------------------+ | <= | Less than or equal operator | +----------------+----------------------------------------+ -| = | Equal operator | +| = | Equal operator (supports * wildcards) | +----------------+----------------------------------------+ | LIKE | Simple Pattern matching | +----------------+----------------------------------------+ @@ -209,6 +209,68 @@ expr LIKE pattern. The expr is string value, pattern is supports literal text, a | True | True | False | False | +--------------------+------------------+------------------------+----------------------+ +Equals and Not-Equals with Wildcards +------------------------------------ + +Both ``=`` (equal) and ``!=`` (not equal) operators support wildcard pattern matching using OpenSearch native wildcard characters. When a string value contains wildcards, these operations will perform pattern matching instead of exact matching. + +**Important**: In SQL, wildcard patterns MUST be enclosed in quotes (single quotes are standard) because the ``*`` and ``?`` characters are not valid in unquoted literals. + +**Supported wildcards:** + +- ``*`` matches zero or more characters (OpenSearch native) +- ``?`` matches exactly one character (OpenSearch native) +- All other characters are matched literally + +**Field type restrictions:** + +- **Only works properly on keyword fields** - OpenSearch wildcard queries are designed for keyword fields +- **Text fields**: Wildcard queries do NOT work on analyzed text fields. You must use the ``.keyword`` subfield (e.g., ``title.keyword = 'test*'``) for proper wildcard matching +- **Numeric, IP, date, boolean fields**: These use script-based matching which is less efficient but functional +- For SQL-style wildcards (``%``, ``_``), use the ``LIKE`` operator instead + +Examples:: + + os> SELECT 'test123' = 'test*', 'hello' = 'hel*', 'exact' = 'exact'; + fetched rows / total rows = 1/1 + +--------------------+------------------+-----------------+ + | 'test123' = 'test*' | 'hello' = 'hel*' | 'exact' = 'exact' | + |--------------------+------------------+-----------------| + | True | True | True | + +--------------------+------------------+-----------------+ + + os> SELECT '4232151232' = '4232*1232', 'no match' = 'test*'; + fetched rows / total rows = 1/1 + +------------------------------+-----------------------+ + | '4232151232' = '4232*1232' | 'no match' = 'test*' | + |------------------------------+-----------------------| + | True | False | + +------------------------------+-----------------------+ + + os> SELECT 'test123' != 'test*', 'hello' != 'xyz*', 'exact' != 'exact'; + fetched rows / total rows = 1/1 + +---------------------+-------------------+-------------------+ + | 'test123' != 'test*' | 'hello' != 'xyz*' | 'exact' != 'exact' | + |---------------------+-------------------+-------------------| + | False | True | False | + +---------------------+-------------------+-------------------+ + + os> SELECT 'test1' = 'test?', 'hello' = 'hel?o', 'test12' = 'test?'; + fetched rows / total rows = 1/1 + +------------------+-------------------+-------------------+ + | 'test1' = 'test?' | 'hello' = 'hel?o' | 'test12' = 'test?' | + |------------------+-------------------+-------------------| + | True | True | False | + +------------------+-------------------+-------------------+ + + os> SELECT 'test123' LIKE 'test%', 'hello' LIKE 'hel_o', 'exact' = 'exact'; + fetched rows / total rows = 1/1 + +------------------------+----------------------+-----------------+ + | 'test123' LIKE 'test%' | 'hello' LIKE 'hel_o' | 'exact' = 'exact' | + |------------------------+----------------------+-----------------| + | True | True | True | + +------------------------+----------------------+-----------------+ + NULL value test --------------- diff --git a/docs/user/ppl/cmd/where.rst b/docs/user/ppl/cmd/where.rst index 115bffe7de5..f5b690e96e7 100644 --- a/docs/user/ppl/cmd/where.rst +++ b/docs/user/ppl/cmd/where.rst @@ -36,3 +36,67 @@ PPL query:: | 13 | F | +----------------+--------+ +Example 2: Filter with wildcard patterns +======================================== + +Both equals (``=``) and not-equals (``!=``) operators support wildcard pattern matching using wildcard characters when a string value contains wildcards. + +**Important**: Wildcard patterns MUST be enclosed in quotes (single or double) because the ``*`` and ``?`` characters are not valid in unquoted literals according to PPL grammar. + +**Supported wildcards:** + +- ``*`` matches zero or more characters (OpenSearch native) +- ``?`` matches exactly one character (OpenSearch native) +- For SQL-style wildcards (``%``, ``_``), use the ``LIKE`` operator instead + +**Field type restrictions:** + +- **Only works properly on keyword fields** - OpenSearch wildcard queries are designed for keyword fields +- **Text fields**: Wildcard queries do NOT work on analyzed text fields. You must use the ``.keyword`` subfield (e.g., ``firstname.keyword = "Am*"``) for proper wildcard matching +- **Numeric, IP, date, boolean fields**: These use script-based matching which is less efficient but functional + +PPL query:: + + // Correct: Wildcard pattern is quoted + os> source=accounts | where firstname="Am*" | fields firstname, lastname; + fetched rows / total rows = 1/1 + +-----------+----------+ + | firstname | lastname | + |-----------|----------| + | Amber | Duke | + +-----------+----------+ + + // Correct: Numeric field with wildcard pattern quoted (works via script query) + os> source=accounts | where account_number="1*" | fields account_number, firstname; + fetched rows / total rows = 2/2 + +----------------+-----------+ + | account_number | firstname | + |----------------+-----------| + | 1 | Amber | + | 18 | Dale | + +----------------+-----------+ + + os> source=accounts | where firstname!="Am*" | fields firstname, lastname; + fetched rows / total rows = 3/3 + +-----------+----------+ + | firstname | lastname | + |-----------|----------| + | Hattie | Bond | + | Nanette | Bates | + | Dale | Adams | + +-----------+----------+ + +**Common Errors:** + + // WRONG: Unquoted wildcard pattern - will cause syntax error + os> source=accounts | where account_number=1* + Error: SyntaxCheckException - [] is not a valid term at '1*' + + // WRONG: Unquoted wildcard pattern with text + os> source=accounts | where firstname=Am* + Error: SyntaxCheckException - [] is not a valid term at 'Am*' + + // CORRECT: Always quote wildcard patterns + os> source=accounts | where account_number="1*" + os> source=accounts | where firstname="Am*" + diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WildcardEqualIntegrationIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WildcardEqualIntegrationIT.java new file mode 100644 index 00000000000..dfe630a592a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WildcardEqualIntegrationIT.java @@ -0,0 +1,195 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +/** + * Integration tests for wildcard pattern matching with = and != operators in PPL. + * Tests behavior with and without Calcite engine and pushdown optimization. + */ +public class WildcardEqualIntegrationIT extends PPLIntegTestCase { + + private static final String TEST_INDEX_WILDCARD = "wildcard"; + + @Override + protected void init() throws Exception { + loadIndex(Index.WILDCARD); + } + + // ========== Basic Wildcard Tests ========== + + @Test + public void test_wildcard_asterisk_keyword_field() throws IOException { + String query = "search source=" + TEST_INDEX_WILDCARD + " | where KeywordBody='test*' | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } + + @Test + public void test_wildcard_question_keyword_field() throws IOException { + String query = "search source=" + TEST_INDEX_WILDCARD + " | where KeywordBody='test?' | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("test1"), + rows("test2")); + } + + @Test + public void test_wildcard_not_equals_keyword_field() throws IOException { + String query = "search source=" + TEST_INDEX_WILDCARD + " | where KeywordBody!='test*' | fields KeywordBody"; + JSONObject result = executeQuery(query); + // Should return all rows that don't match the pattern + assertTrue(result.getInt("total") > 0); + } + + // ========== Calcite Engine Tests ========== + + @Test + public void test_wildcard_with_calcite_enabled() throws IOException { + enableCalcite(); + allowCalciteFallback(); + + try { + String query = "search source=" + TEST_INDEX_WILDCARD + " | where KeywordBody='test*' | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } finally { + disableCalcite(); + disallowCalciteFallback(); + } + } + + @Test + public void test_numeric_wildcard_with_calcite_requires_cast() throws IOException { + enableCalcite(); + allowCalciteFallback(); + + try { + // This should fail with type error - numeric fields can't use wildcards without CAST + String query = "search source=" + TEST_INDEX_WILDCARD + " | where count='123*' | fields count"; + JSONObject result = executeQuery(query); + + // With our fix, this should return an error about incompatible types + assertCondition("Should get error for numeric wildcard without CAST", + result.has("error") || result.getInt("total") == 0); + } finally { + disableCalcite(); + disallowCalciteFallback(); + } + } + + @Test + public void test_explicit_cast_with_wildcard_calcite() throws IOException { + enableCalcite(); + allowCalciteFallback(); + + try { + // With explicit CAST, numeric wildcards should work + String query = "search source=" + TEST_INDEX_WILDCARD + " | where CAST(count AS STRING)='10*' | fields count"; + JSONObject result = executeQuery(query); + + // This should work with the explicit CAST + assertCondition("Explicit CAST should allow numeric wildcards", result.getInt("total") >= 0); + } finally { + disableCalcite(); + disallowCalciteFallback(); + } + } + + // ========== Pushdown Tests ========== + + @Test + public void test_keyword_wildcard_with_pushdown() throws IOException { + enableCalcite(); + enablePushdown(); + + try { + // Keyword fields should use native wildcardQuery with pushdown + String query = "search source=" + TEST_INDEX_WILDCARD + " | where KeywordBody='test*' | fields KeywordBody"; + JSONObject result = executeQuery(query); + + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } finally { + disablePushdown(); + disableCalcite(); + } + } + + @Test + public void test_text_field_keyword_subfield_with_pushdown() throws IOException { + enableCalcite(); + enablePushdown(); + + try { + // Text fields should automatically use .keyword subfield if available + String query = "search source=" + TEST_INDEX_WILDCARD + " | where TextBody='test*' | fields TextBody"; + JSONObject result = executeQuery(query); + + // Should work if .keyword subfield exists + assertCondition("Text field with .keyword should support wildcards", result.getInt("total") >= 0); + } finally { + disablePushdown(); + disableCalcite(); + } + } + + @Test + public void test_wildcard_without_pushdown() throws IOException { + enableCalcite(); + disablePushdown(); + + try { + // Without pushdown, should fetch all data and filter in-memory + String query = "search source=" + TEST_INDEX_WILDCARD + " | where KeywordBody='test*' | fields KeywordBody"; + JSONObject result = executeQuery(query); + + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } finally { + enablePushdown(); // Re-enable for other tests + disableCalcite(); + } + } + + // ========== Helper Methods ========== + + private void enablePushdown() throws IOException { + updateClusterSettings( + new SQLIntegTestCase.ClusterSetting( + "persistent", Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), "true")); + } + + private void disablePushdown() throws IOException { + updateClusterSettings( + new SQLIntegTestCase.ClusterSetting( + "persistent", Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), "false")); + } + + private void assertCondition(String message, boolean condition) { + if (!condition) { + throw new AssertionError(message); + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/WildcardEqualIntegrationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/WildcardEqualIntegrationIT.java new file mode 100644 index 00000000000..eabca548686 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/WildcardEqualIntegrationIT.java @@ -0,0 +1,245 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +/** + * Integration tests for wildcard pattern matching with = and != operators in SQL. + * Tests behavior with and without Calcite engine and pushdown optimization. + */ +public class WildcardEqualIntegrationIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.WILDCARD); + } + + // ========== Basic Wildcard Tests ========== + + @Test + public void test_sql_wildcard_equal_asterisk() throws IOException { + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody='test*'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } + + @Test + public void test_sql_wildcard_equal_question() throws IOException { + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody='test?'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test1"), + rows("test2")); + } + + @Test + public void test_sql_wildcard_not_equal() throws IOException { + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody!='test*' LIMIT 5"; + JSONObject result = executeJdbcRequest(query); + // Should return rows that don't match the pattern + assertCondition("Should return non-matching rows", result.getInt("total") > 0); + } + + @Test + public void test_sql_wildcard_complex_pattern() throws IOException { + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody='test*wild*'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body")); + } + + // ========== Calcite Engine Tests ========== + + @Test + public void test_sql_wildcard_with_calcite() throws IOException { + enableCalcite(); + allowCalciteFallback(); + + try { + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody='test*'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } finally { + disableCalcite(); + disallowCalciteFallback(); + } + } + + @Test + public void test_sql_numeric_wildcard_requires_cast() throws IOException { + enableCalcite(); + allowCalciteFallback(); + + try { + // This should fail - numeric fields can't use wildcards without CAST + String query = "SELECT count FROM wildcard WHERE count='123*'"; + JSONObject result = executeJdbcRequest(query); + + // With our fix, this should return an error about incompatible types + assertCondition("Should get error for numeric wildcard without CAST", + result.has("error") || result.getInt("total") == 0); + } finally { + disableCalcite(); + disallowCalciteFallback(); + } + } + + @Test + public void test_sql_cast_with_wildcard() throws IOException { + enableCalcite(); + allowCalciteFallback(); + + try { + // With explicit CAST, numeric wildcards should work + String query = "SELECT count FROM wildcard WHERE CAST(count AS VARCHAR)='10*'"; + JSONObject result = executeJdbcRequest(query); + + // This should work with the explicit CAST + assertCondition("Explicit CAST should allow numeric wildcards", result.getInt("total") >= 0); + } finally { + disableCalcite(); + disallowCalciteFallback(); + } + } + + // ========== Pushdown Tests ========== + + @Test + public void test_sql_keyword_wildcard_with_pushdown() throws IOException { + enableCalcite(); + enablePushdown(); + + try { + // Keyword fields should use native wildcardQuery with pushdown + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody='test*'"; + JSONObject result = executeJdbcRequest(query); + + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } finally { + disablePushdown(); + disableCalcite(); + } + } + + @Test + public void test_sql_text_field_with_pushdown() throws IOException { + enableCalcite(); + enablePushdown(); + + try { + // Text fields should automatically use .keyword subfield if available + String query = "SELECT TextBody FROM wildcard WHERE TextBody='test*'"; + JSONObject result = executeJdbcRequest(query); + + // Should work if .keyword subfield exists + assertCondition("Text field with .keyword should support wildcards", result.getInt("total") >= 0); + } finally { + disablePushdown(); + disableCalcite(); + } + } + + @Test + public void test_sql_wildcard_without_pushdown() throws IOException { + enableCalcite(); + disablePushdown(); + + try { + // Without pushdown, should fetch all data and filter in-memory + String query = "SELECT KeywordBody FROM wildcard WHERE KeywordBody='test*'"; + JSONObject result = executeJdbcRequest(query); + + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in body"), + rows("test123")); + } finally { + enablePushdown(); // Re-enable for other tests + disableCalcite(); + } + } + + @Test + public void test_sql_ip_field_wildcard() throws IOException { + enableCalcite(); + enablePushdown(); + + try { + // IP fields with wildcards should require CAST + String query = "SELECT ip_field FROM wildcard WHERE ip_field='192.168.*.*'"; + JSONObject result = executeJdbcRequest(query); + + // Should fail without CAST + assertCondition("IP field wildcard should require CAST", + result.has("error") || result.getInt("total") == 0); + } finally { + disablePushdown(); + disableCalcite(); + } + } + + // ========== Helper Methods ========== + + private void enableCalcite() throws IOException { + updateClusterSettings( + new ClusterSetting( + "persistent", Settings.Key.CALCITE_ENGINE_ENABLED.getKeyValue(), "true")); + } + + private void disableCalcite() throws IOException { + updateClusterSettings( + new ClusterSetting( + "persistent", Settings.Key.CALCITE_ENGINE_ENABLED.getKeyValue(), "false")); + } + + private void allowCalciteFallback() throws IOException { + updateClusterSettings( + new ClusterSetting( + "persistent", Settings.Key.CALCITE_FALLBACK_ALLOWED.getKeyValue(), "true")); + } + + private void disallowCalciteFallback() throws IOException { + updateClusterSettings( + new ClusterSetting( + "persistent", Settings.Key.CALCITE_FALLBACK_ALLOWED.getKeyValue(), "false")); + } + + private void enablePushdown() throws IOException { + updateClusterSettings( + new ClusterSetting( + "persistent", Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), "true")); + } + + private void disablePushdown() throws IOException { + updateClusterSettings( + new ClusterSetting( + "persistent", Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), "false")); + } + + private void assertCondition(String message, boolean condition) { + if (!condition) { + throw new AssertionError(message); + } + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 58f5b4cf585..b5b6b1d55fe 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -35,6 +35,7 @@ import static org.opensearch.index.query.QueryBuilders.matchQuery; import static org.opensearch.index.query.QueryBuilders.rangeQuery; import static org.opensearch.index.query.QueryBuilders.regexpQuery; +import static org.opensearch.index.query.QueryBuilders.scriptQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.index.query.QueryBuilders.termsQuery; import static org.opensearch.index.query.QueryBuilders.wildcardQuery; @@ -1193,11 +1194,123 @@ public QueryExpression equals(LiteralExpression literal) { .must(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value))) .must(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lte(value))); } else { - builder = termQuery(getFieldReferenceForTermQuery(), value); + // Support wildcards in equals operations - check if value contains wildcards + String stringValue = value.toString(); + if (containsWildcards(stringValue)) { + // First, try to get the appropriate field reference for wildcard queries + String wildcardFieldRef = getFieldReferenceForWildcard(); + + // Check if we can use wildcard query (keyword field or text.keyword subfield) + if (isTextOrKeywordField() || !wildcardFieldRef.equals(getFieldReferenceForTermQuery())) { + // Either it's a keyword field, or we found a .keyword subfield for text + builder = wildcardQuery(wildcardFieldRef, stringValue); + } else { + // For non-keyword fields (numeric, IP, etc.), fall back to script query + // This allows wildcard matching on the string representation + String script = String.format( + "doc['%s'].size() > 0 && doc['%s'].value.toString().matches('%s')", + getFieldReference(), + getFieldReference(), + convertWildcardToRegex(stringValue) + ); + builder = scriptQuery(new Script(script)); + } + } else { + // Use regular term query for exact matches + builder = termQuery(getFieldReferenceForTermQuery(), value); + } } return this; } + /** + * Check if the string contains OpenSearch/Lucene native wildcard characters. Only supports * + * (zero or more) and ? (exactly one) - the native OpenSearch wildcards. + */ + private boolean containsWildcards(String value) { + return value.contains("*") || value.contains("?"); + } + + /** + * Check if the field is a keyword field that supports wildcard queries. + * IMPORTANT: OpenSearch wildcard queries ONLY work on keyword fields, NOT text fields. + * Text, IP, numeric, date, and other field types don't support wildcard queries. + */ + private boolean isTextOrKeywordField() { + if (rel == null || rel.getExprType() == null) { + // If type info is not available, check if we need to use .keyword subfield + // For safety, assume we need script query + return false; + } + + ExprType exprType = rel.getExprType(); + + // For text fields, we need to check if we're already using .keyword subfield + String fieldRef = getFieldReferenceForTermQuery(); + if (fieldRef != null && fieldRef.endsWith(".keyword")) { + return true; // Using .keyword subfield, wildcard will work + } + + // Check for OpenSearch keyword type specifically + if (exprType instanceof OpenSearchDataType) { + String typeName = exprType.typeName().toLowerCase(); + // Only keyword fields support wildcard queries + return typeName.equals("keyword") || typeName.contains("keyword"); + } + + // For other types, check if it's a keyword-like field + String typeName = exprType.typeName().toLowerCase(); + // Note: "string" in OpenSearch usually means keyword, not text + return typeName.equals("keyword") || typeName.equals("string"); + } + + /** + * Get the appropriate field reference for wildcard queries. + * For text fields, automatically use .keyword subfield if it exists. + */ + private String getFieldReferenceForWildcard() { + String fieldRef = getFieldReferenceForTermQuery(); + + // If it's a text field and doesn't already end with .keyword + if (rel != null && rel.getExprType() != null && !fieldRef.endsWith(".keyword")) { + // Use the existing OpenSearchTextType utility to check for .keyword subfield + String keywordField = OpenSearchTextType.toKeywordSubField(getFieldReference(), rel.getExprType()); + if (keywordField != null) { + // A keyword subfield exists, use it for wildcard query + return keywordField; + } + } + + return fieldRef; + } + + /** + * Convert OpenSearch wildcard pattern to Java regex pattern. + * * -> .* (zero or more characters) + * ? -> . (exactly one character) + * Other characters are escaped for regex. + */ + private String convertWildcardToRegex(String wildcard) { + // Escape special regex characters except * and ? + String escaped = wildcard + .replace(".", "\\.") + .replace("+", "\\+") + .replace("^", "\\^") + .replace("$", "\\$") + .replace("(", "\\(") + .replace(")", "\\)") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("|", "\\|"); + + // Convert wildcards + return escaped + .replace("*", ".*") + .replace("?", "."); + } + @Override public QueryExpression notEquals(LiteralExpression literal) { Object value = literal.value(); @@ -1207,11 +1320,38 @@ public QueryExpression notEquals(LiteralExpression literal) { .should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value))) .should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value))); } else { - builder = - boolQuery() - // NOT LIKE should return false when field is NULL - .must(existsQuery(getFieldReference())) - .mustNot(termQuery(getFieldReferenceForTermQuery(), value)); + // Support wildcards in not-equals operations + String stringValue = value.toString(); + if (containsWildcards(stringValue)) { + // First, try to get the appropriate field reference for wildcard queries + String wildcardFieldRef = getFieldReferenceForWildcard(); + + // Check if we can use wildcard query (keyword field or text.keyword subfield) + if (isTextOrKeywordField() || !wildcardFieldRef.equals(getFieldReferenceForTermQuery())) { + // Either it's a keyword field, or we found a .keyword subfield for text + builder = + boolQuery() + // Field must exist + .must(existsQuery(getFieldReference())) + // Must not match the wildcard pattern + .mustNot(wildcardQuery(wildcardFieldRef, stringValue)); + } else { + // For non-keyword fields (numeric, IP, etc.), use script query + String script = String.format( + "doc['%s'].size() > 0 && !doc['%s'].value.toString().matches('%s')", + getFieldReference(), + getFieldReference(), + convertWildcardToRegex(stringValue) + ); + builder = scriptQuery(new Script(script)); + } + } else { + builder = + boolQuery() + // NOT LIKE should return false when field is NULL + .must(existsQuery(getFieldReference())) + .mustNot(termQuery(getFieldReferenceForTermQuery(), value)); + } } return this; } @@ -1311,8 +1451,30 @@ public QueryExpression notIn(LiteralExpression literal) { @Override public QueryExpression equals(Object point, boolean isTimeStamp) { - builder = - termQuery(getFieldReferenceForTermQuery(), convertEndpointValue(point, isTimeStamp)); + Object value = convertEndpointValue(point, isTimeStamp); + String stringValue = value.toString(); + if (containsWildcards(stringValue)) { + // First, try to get the appropriate field reference for wildcard queries + String wildcardFieldRef = getFieldReferenceForWildcard(); + + // Check if we can use wildcard query (keyword field or text.keyword subfield) + if (isTextOrKeywordField() || !wildcardFieldRef.equals(getFieldReferenceForTermQuery())) { + // Either it's a keyword field, or we found a .keyword subfield for text + builder = wildcardQuery(wildcardFieldRef, stringValue); + } else { + // For non-keyword fields (numeric, IP, etc.), use script query + String script = String.format( + "doc['%s'].size() > 0 && doc['%s'].value.toString().matches('%s')", + getFieldReference(), + getFieldReference(), + convertWildcardToRegex(stringValue) + ); + builder = scriptQuery(new Script(script)); + } + } else { + // Use regular term query for exact matches + builder = termQuery(getFieldReferenceForTermQuery(), value); + } return this; }