Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -310,18 +310,32 @@ STRCMP: 'STRCMP';
ADDDATE: 'ADDDATE';

// RELEVANCE FUNCTIONS AND PARAMETERS
ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD';
ANALYZE_WILDCARD: 'ANALYZE_WILDCARD';
ANALYZER: 'ANALYZER';
FUZZINESS: 'FUZZINESS';
AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY';
MAX_EXPANSIONS: 'MAX_EXPANSIONS';
PREFIX_LENGTH: 'PREFIX_LENGTH';
BOOST: 'BOOST';
CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY';
ENABLE_POSITION_INCREMENTS: 'ENABLE_POSITION_INCREMENTS';
FIELDS: 'FIELDS';
FLAGS: 'FLAGS';
FUZZINESS: 'FUZZINESS';
FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS';
FUZZY_REWRITE: 'FUZZY_REWRITE';
LENIENT: 'LENIENT';
OPERATOR: 'OPERATOR';
LOW_FREQ_OPERATOR: 'LOW_FREQ_OPERATOR';
MAX_DETERMINIZED_STATES: 'MAX_DETERMINIZED_STATES';
MAX_EXPANSIONS: 'MAX_EXPANSIONS';
MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH';
OPERATOR: 'OPERATOR';
PHRASE_SLOP: 'PHRASE_SLOP';
PREFIX_LENGTH: 'PREFIX_LENGTH';
QUOTE_FIELD_SUFFIX: 'QUOTE_FIELD_SUFFIX';
REWRITE: 'REWRITE';
SLOP: 'SLOP';
TIE_BREAKER: 'TIE_BREAKER';
TIME_ZONE: 'TIME_ZONE';
TYPE: 'TYPE';
ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY';
BOOST: 'BOOST';

// Operators

Expand Down
10 changes: 6 additions & 4 deletions sql/src/main/antlr/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ flowControlFunctionName
;

relevanceFunctionName
: MATCH
: MATCH | MATCH_PHRASE
;

legacyRelevanceFunctionName
Expand All @@ -403,9 +403,11 @@ relevanceArg
;

relevanceArgName
: ANALYZER | FUZZINESS | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY | MAX_EXPANSIONS | PREFIX_LENGTH
| FUZZY_TRANSPOSITIONS | FUZZY_REWRITE | LENIENT | OPERATOR | MINIMUM_SHOULD_MATCH | ZERO_TERMS_QUERY
| BOOST
: ALLOW_LEADING_WILDCARD | ANALYZE_WILDCARD | ANALYZER | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY | BOOST
| CUTOFF_FREQUENCY | ENABLE_POSITION_INCREMENTS | FIELDS | FLAGS | FUZZINESS | FUZZY_TRANSPOSITIONS
| LENIENT | LOW_FREQ_OPERATOR | MAX_DETERMINIZED_STATES | MAX_EXPANSIONS | MINIMUM_SHOULD_MATCH
| OPERATOR | PHRASE_SLOP | PREFIX_LENGTH | QUOTE_FIELD_SUFFIX | REWRITE | SLOP | TIE_BREAKER | TIME_ZONE
| TYPE | ZERO_TERMS_QUERY
;

relevanceArgValue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,14 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;

import org.apache.commons.lang3.RandomStringUtils;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.common.antlr.SyntaxCheckException;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

class SQLSyntaxParserTest {

private final SQLSyntaxParser parser = new SQLSyntaxParser();
Expand Down Expand Up @@ -144,4 +149,83 @@ public void canNotParseShowStatementWithoutFilterClause() {
assertThrows(SyntaxCheckException.class, () -> parser.parse("SHOW TABLES"));
}

@Test
public void canParseRelevanceFunctions() {
assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, \"this is a test\")"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, 'this is a test')"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match(`column`, \"this is a test\")"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match(`column`, 'this is a test')"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, 100500)"));

assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, \"this is a test\")"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, 'this is a test')"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(`column`, \"this is a test\")"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(`column`, 'this is a test')"));
assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, 100500)"));
}

private void generateAndTestQuery(String function, HashMap<String, Object[]> functionArgs) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice test generator!

It's pretty complex -- let's make it easier to understand.

In this PR I'll comment on this test specifically.

As a separate task, I'd like to update it to use parametrized tests and to also use it for PPL.

var rand = new Random();

for (int i = 0; i < 100; i++)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test should exercise all combinations that are worth testing.

We can end up with a non-deterministic unit test if there are more than 100 test cases and there's a problem with one of them.

{
StringBuilder query = new StringBuilder();
query.append(String.format("SELECT * FROM test WHERE %s(%s, %s", function,
RandomStringUtils.random(10, true, false),
RandomStringUtils.random(10, true, false)));
var args = new ArrayList<String>();
for (var pair : functionArgs.entrySet())
{
if (rand.nextBoolean())
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this if-else control exactly?

{
var arg = new StringBuilder();
arg.append(rand.nextBoolean() ? "," : ", ");
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either "," or ", " are sufficient. Currently, it is exercising the existing lexer as opposed to the parser changes.

arg.append(rand.nextBoolean() ? pair.getKey().toLowerCase() : pair.getKey().toUpperCase());
arg.append(rand.nextBoolean() ? "=" : " = ");
if (pair.getValue() instanceof String[] || rand.nextBoolean()) {
var quoteSymbol = rand.nextBoolean() ? '\'' : '"';
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's pick either single or double quote and stick to that.

arg.append(quoteSymbol);
arg.append(pair.getValue()[rand.nextInt(pair.getValue().length)]);
arg.append(quoteSymbol);
}
else
arg.append(pair.getValue()[rand.nextInt(pair.getValue().length)]);
args.add(arg.toString());
}
}
Collections.shuffle(args);
for (var arg : args)
query.append(arg);
query.append(rand.nextBoolean() ? ")" : ");");
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd skip switching between ")" and ");".
This is testing area of the parser that we did not affect.

//System.out.printf("%d, %s%n", i, query.toString());
assertNotNull(parser.parse(query.toString()));
}
}

// TODO run all tests and collect exceptions and raise them in the end
@Test
public void canParseRelevanceFunctionsComplexRandomArgs() {
var matchArgs = new HashMap<String, Object[]>();
matchArgs.put("fuzziness", new String[]{ "AUTO", "AUTO:1,5", "1" });
matchArgs.put("fuzzy_transpositions", new Boolean[]{ true, false });
matchArgs.put("operator", new String[]{ "and", "or" });
matchArgs.put("minimum_should_match", new String[]{ "3", "-2", "75%", "-25%", "3<90%", "2<-25% 9<-3" });
matchArgs.put("analyzer", new String[]{ "standard", "stop", "english" });
matchArgs.put("zero_terms_query", new String[]{ "none", "all" });
matchArgs.put("lenient", new Boolean[]{ true, false });
// deprecated
matchArgs.put("cutoff_frequency", new Double[]{ .0, 0.001, 1., 42. });
matchArgs.put("prefix_length", new Integer[]{ 0, 2, 5 });
matchArgs.put("max_expansions", new Integer[]{ 0, 5, 20 });
matchArgs.put("boost", new Double[]{ .5, 1., 2.3 });

generateAndTestQuery("match", matchArgs);

var matchPhraseArgs = new HashMap<String, Object[]>();
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be a good place to use ImmutableMap builder -- https://guava.dev/releases/19.0/api/docs/com/google/common/collect/ImmutableMap.Builder.html

SQL plugin already depends on guava.

matchPhraseArgs.put("analyzer", new String[]{ "standard", "stop", "english" });
matchPhraseArgs.put("max_expansions", new Integer[]{ 0, 5, 20 });
matchPhraseArgs.put("slop", new Integer[]{ 0, 1, 2 });

generateAndTestQuery("match_phrase", matchPhraseArgs);
}
}