diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index ed2d7e81606..430ca3a045b 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -310,18 +310,32 @@ STRCMP: 'STRCMP'; ADDDATE: 'ADDDATE'; // RELEVANCE FUNCTIONS AND PARAMETERS +ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD'; +ANALYZE_WILDCARD: 'ANALYZE_WILDCARD'; ANALYZER: 'ANALYZER'; -FUZZINESS: 'FUZZINESS'; AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'; -MAX_EXPANSIONS: 'MAX_EXPANSIONS'; -PREFIX_LENGTH: 'PREFIX_LENGTH'; +BOOST: 'BOOST'; +CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY'; +ENABLE_POSITION_INCREMENTS: 'ENABLE_POSITION_INCREMENTS'; +FIELDS: 'FIELDS'; +FLAGS: 'FLAGS'; +FUZZINESS: 'FUZZINESS'; FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS'; -FUZZY_REWRITE: 'FUZZY_REWRITE'; LENIENT: 'LENIENT'; -OPERATOR: 'OPERATOR'; +LOW_FREQ_OPERATOR: 'LOW_FREQ_OPERATOR'; +MAX_DETERMINIZED_STATES: 'MAX_DETERMINIZED_STATES'; +MAX_EXPANSIONS: 'MAX_EXPANSIONS'; MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH'; +OPERATOR: 'OPERATOR'; +PHRASE_SLOP: 'PHRASE_SLOP'; +PREFIX_LENGTH: 'PREFIX_LENGTH'; +QUOTE_FIELD_SUFFIX: 'QUOTE_FIELD_SUFFIX'; +REWRITE: 'REWRITE'; +SLOP: 'SLOP'; +TIE_BREAKER: 'TIE_BREAKER'; +TIME_ZONE: 'TIME_ZONE'; +TYPE: 'TYPE'; ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; -BOOST: 'BOOST'; // Operators diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 0b8f3c5250e..f1fef47f459 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -383,7 +383,7 @@ flowControlFunctionName ; relevanceFunctionName - : MATCH + : MATCH | MATCH_PHRASE ; legacyRelevanceFunctionName @@ -403,9 +403,11 @@ relevanceArg ; relevanceArgName - : ANALYZER | FUZZINESS | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY | MAX_EXPANSIONS | PREFIX_LENGTH - | FUZZY_TRANSPOSITIONS | FUZZY_REWRITE | LENIENT | OPERATOR | MINIMUM_SHOULD_MATCH | ZERO_TERMS_QUERY - | BOOST + : ALLOW_LEADING_WILDCARD | ANALYZE_WILDCARD | ANALYZER | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY | BOOST + | CUTOFF_FREQUENCY | ENABLE_POSITION_INCREMENTS | FIELDS | FLAGS | FUZZINESS | FUZZY_TRANSPOSITIONS + | LENIENT | LOW_FREQ_OPERATOR | MAX_DETERMINIZED_STATES | MAX_EXPANSIONS | MINIMUM_SHOULD_MATCH + | OPERATOR | PHRASE_SLOP | PREFIX_LENGTH | QUOTE_FIELD_SUFFIX | REWRITE | SLOP | TIE_BREAKER | TIME_ZONE + | TYPE | ZERO_TERMS_QUERY ; relevanceArgValue diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index e7cb22e8a2a..0d4bc03d2f6 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -9,9 +9,14 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import org.apache.commons.lang3.RandomStringUtils; import org.junit.jupiter.api.Test; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + class SQLSyntaxParserTest { private final SQLSyntaxParser parser = new SQLSyntaxParser(); @@ -144,4 +149,83 @@ public void canNotParseShowStatementWithoutFilterClause() { assertThrows(SyntaxCheckException.class, () -> parser.parse("SHOW TABLES")); } + @Test + public void canParseRelevanceFunctions() { + assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, \"this is a test\")")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, 'this is a test')")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match(`column`, \"this is a test\")")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match(`column`, 'this is a test')")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, 100500)")); + + assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, \"this is a test\")")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, 'this is a test')")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(`column`, \"this is a test\")")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(`column`, 'this is a test')")); + assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, 100500)")); + } + + private void generateAndTestQuery(String function, HashMap functionArgs) { + var rand = new Random(); + + for (int i = 0; i < 100; i++) + { + StringBuilder query = new StringBuilder(); + query.append(String.format("SELECT * FROM test WHERE %s(%s, %s", function, + RandomStringUtils.random(10, true, false), + RandomStringUtils.random(10, true, false))); + var args = new ArrayList(); + for (var pair : functionArgs.entrySet()) + { + if (rand.nextBoolean()) + { + var arg = new StringBuilder(); + arg.append(rand.nextBoolean() ? "," : ", "); + arg.append(rand.nextBoolean() ? pair.getKey().toLowerCase() : pair.getKey().toUpperCase()); + arg.append(rand.nextBoolean() ? "=" : " = "); + if (pair.getValue() instanceof String[] || rand.nextBoolean()) { + var quoteSymbol = rand.nextBoolean() ? '\'' : '"'; + arg.append(quoteSymbol); + arg.append(pair.getValue()[rand.nextInt(pair.getValue().length)]); + arg.append(quoteSymbol); + } + else + arg.append(pair.getValue()[rand.nextInt(pair.getValue().length)]); + args.add(arg.toString()); + } + } + Collections.shuffle(args); + for (var arg : args) + query.append(arg); + query.append(rand.nextBoolean() ? ")" : ");"); + //System.out.printf("%d, %s%n", i, query.toString()); + assertNotNull(parser.parse(query.toString())); + } + } + + // TODO run all tests and collect exceptions and raise them in the end + @Test + public void canParseRelevanceFunctionsComplexRandomArgs() { + var matchArgs = new HashMap(); + matchArgs.put("fuzziness", new String[]{ "AUTO", "AUTO:1,5", "1" }); + matchArgs.put("fuzzy_transpositions", new Boolean[]{ true, false }); + matchArgs.put("operator", new String[]{ "and", "or" }); + matchArgs.put("minimum_should_match", new String[]{ "3", "-2", "75%", "-25%", "3<90%", "2<-25% 9<-3" }); + matchArgs.put("analyzer", new String[]{ "standard", "stop", "english" }); + matchArgs.put("zero_terms_query", new String[]{ "none", "all" }); + matchArgs.put("lenient", new Boolean[]{ true, false }); + // deprecated + matchArgs.put("cutoff_frequency", new Double[]{ .0, 0.001, 1., 42. }); + matchArgs.put("prefix_length", new Integer[]{ 0, 2, 5 }); + matchArgs.put("max_expansions", new Integer[]{ 0, 5, 20 }); + matchArgs.put("boost", new Double[]{ .5, 1., 2.3 }); + + generateAndTestQuery("match", matchArgs); + + var matchPhraseArgs = new HashMap(); + matchPhraseArgs.put("analyzer", new String[]{ "standard", "stop", "english" }); + matchPhraseArgs.put("max_expansions", new Integer[]{ 0, 5, 20 }); + matchPhraseArgs.put("slop", new Integer[]{ 0, 1, 2 }); + + generateAndTestQuery("match_phrase", matchPhraseArgs); + } }