diff --git a/geode-core/src/main/antlr/org/apache/geode/cache/query/internal/parse/oql.g b/geode-core/src/main/antlr/org/apache/geode/cache/query/internal/parse/oql.g index cdd1623333e5..5ae8b4e4a79e 100644 --- a/geode-core/src/main/antlr/org/apache/geode/cache/query/internal/parse/oql.g +++ b/geode-core/src/main/antlr/org/apache/geode/cache/query/internal/parse/oql.g @@ -571,11 +571,16 @@ projectionAttributes : projection!{ AST node = null;}: - lb1:identifier TOK_COLON! ( tok1:aggregateExpr{node = #tok1;} | tok2:expr{node = #tok2;}) + // Use syntactic predicate to resolve nondeterminism between aggregateExpr and expr. + // The predicate checks for aggregate function keywords (sum, avg, min, max, count) followed by '('. + // Without this, the parser cannot determine which alternative to choose when it sees these keywords, + // since they can also be used as identifiers in regular expressions. + lb1:identifier TOK_COLON! ( (("sum"|"avg"|"min"|"max"|"count") TOK_LPAREN)=> tok1:aggregateExpr{node = #tok1;} | tok2:expr{node = #tok2;}) { #projection = #([PROJECTION, "projection", "org.apache.geode.cache.query.internal.parse.ASTProjection"], node, #lb1); } | - (tok3:aggregateExpr{node = #tok3;} | tok4:expr{node = #tok4;}) + // Same syntactic predicate as above to handle projections without a label (identifier:) + ((("sum"|"avg"|"min"|"max"|"count") TOK_LPAREN)=> tok3:aggregateExpr{node = #tok3;} | tok4:expr{node = #tok4;}) ( "as" lb2: identifier @@ -958,7 +963,10 @@ collectionExpr : aggregateExpr { int aggFunc = -1; boolean distinctOnly = false; }: !("sum" {aggFunc = SUM;} | "avg" {aggFunc = AVG;} ) - TOK_LPAREN ("distinct"! {distinctOnly = true;} ) ? tokExpr1:expr TOK_RPAREN + // Use greedy option to resolve nondeterminism with optional 'distinct' keyword. + // Greedy tells the parser to match 'distinct' whenever it appears, rather than + // being ambiguous about whether to match it or skip directly to the expression. + TOK_LPAREN (options {greedy=true;}: "distinct"! {distinctOnly = true;} ) ? tokExpr1:expr TOK_RPAREN { #aggregateExpr = #([AGG_FUNC, "aggregate", "org.apache.geode.cache.query.internal.parse.ASTAggregateFunc"], #tokExpr1); ((ASTAggregateFunc)#aggregateExpr).setAggregateFunctionType(aggFunc); @@ -975,8 +983,9 @@ aggregateExpr { int aggFunc = -1; boolean distinctOnly = false; }: | "count"^ + // Same greedy option as above for count's optional 'distinct' keyword TOK_LPAREN! ( TOK_STAR - | ("distinct"! {distinctOnly = true;} ) ? expr ) TOK_RPAREN! + | (options {greedy=true;}: "distinct"! {distinctOnly = true;} ) ? expr ) TOK_RPAREN! { ((ASTAggregateFunc)#aggregateExpr).setAggregateFunctionType(COUNT); #aggregateExpr.setText("aggregate"); diff --git a/geode-core/src/test/java/org/apache/geode/cache/query/internal/AbstractCompiledValueTestJUnitTest.java b/geode-core/src/test/java/org/apache/geode/cache/query/internal/AbstractCompiledValueTestJUnitTest.java index 8f1e4f4d28ca..b996f872b11f 100644 --- a/geode-core/src/test/java/org/apache/geode/cache/query/internal/AbstractCompiledValueTestJUnitTest.java +++ b/geode-core/src/test/java/org/apache/geode/cache/query/internal/AbstractCompiledValueTestJUnitTest.java @@ -24,6 +24,7 @@ import org.junit.Test; import org.junit.runner.RunWith; +import org.apache.geode.cache.query.internal.parse.OQLLexerTokenTypes; import org.apache.geode.cache.query.internal.types.CollectionTypeImpl; import org.apache.geode.test.junit.runners.GeodeParamsRunner; @@ -47,7 +48,13 @@ private CompiledValue[] getCompiledValuesWhichDoNotImplementGetReceiver() { new LinkedHashMap<>()), new CompiledIn(compiledValue1, compiledValue2), new CompiledIteratorDef("test", new CollectionTypeImpl(), compiledValue1), - new CompiledJunction(new CompiledValue[] {compiledValue1, compiledValue2}, 89), + // Changed from hardcoded value 89 to OQLLexerTokenTypes.LITERAL_or constant. + // The hardcoded value 89 was the token number for LITERAL_or in the original grammar, + // but after adding syntactic predicates to fix nondeterminism warnings, the token + // numbering changed (LITERAL_or is now 94). Using the constant ensures this test + // remains correct regardless of future grammar changes. + new CompiledJunction(new CompiledValue[] {compiledValue1, compiledValue2}, + OQLLexerTokenTypes.LITERAL_or), new CompiledLike(compiledValue1, compiledValue2), new CompiledLiteral(compiledValue1), new CompiledMod(compiledValue1, compiledValue2),