Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions doc/lexer-rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,16 @@ As of 4.5, you can also define channel names like enumerations with the followin
```
channels { WSCHANNEL, MYHIDDEN }
```

## Lexer Rule Options

### caseInsensitive
Copy link
Member Author

@KvanTTT KvanTTT Dec 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about the best location for caseInsensitive lexer rule option. Maybe it makes sense to move to https://github.com/antlr/antlr4/blob/master/doc/options.md#rule-options

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a reference in one spot to the other?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


Defines if the current lexer rule is case-insensitive.
The argument can be `true` or `false`.
The option rewrites `caseInsensitive` grammar option value if it's defined.

```g4
options { caseInsensitive=true; }
STRING options { caseInsensitive=false; } : 'N'? '\'' (~'\'' | '\'\'')* '\''; // lower n is not allowed
```
9 changes: 2 additions & 7 deletions doc/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,9 @@ The mechanism works by automatically transforming grammar references to characte

## Rule Options

There are currently no valid rule-level options, but the tool still supports the following syntax for future use:
### caseInsensitive

```
rulename
options {...}
: ...
;
```
The tool support `caseInsensitive` lexer rule option that is described in [lexer-rules.md](lexer-rules.md#caseinsensitive).

## Rule Element Options

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,26 @@ public void testSetUp() throws Exception {
checkLexerMatches(lg, inputString, "TOKEN, EOF");
}

@Test public void testCaseInsensitiveInLexerRule() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"TOKEN1 options { caseInsensitive=true; } : [a-f]+;\n" +
"WS: [ ]+ -> skip;"
);

checkLexerMatches(lg, "ABCDEF", "TOKEN1, EOF");
}

@Test public void testCaseInsensitiveInLexerRuleOverridesGlobalValue() {
String grammar =
"lexer grammar L;\n" +
"options { caseInsensitive=true; }\n" +
"STRING options { caseInsensitive=false; } : 'N'? '\\'' (~'\\'' | '\\'\\'')* '\\'';\n";

execLexer("L.g4", grammar, "L", "n'sample'");
assertEquals("line 1:0 token recognition error at: 'n'\n", getParseErrors());
}

protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
ATN atn = createATN(lg, true);
CharStream input = CharStreams.fromString(inputString);
Expand Down
38 changes: 35 additions & 3 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,45 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {
testErrors(test, false);
}

@Test public void testIllegalModeOption() {
@Test public void testIllegalCaseInsensitiveOptionValue() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive = badValue; }\n" +
"DEFAULT_TOKEN: [A-F]+;\n",
"TOKEN_1 options { caseInsensitive = badValue; } : [A-F]+;\n",

"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n"
"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n" +
"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:3:36: unsupported option value caseInsensitive=badValue\n"
};

testErrors(test, false);
}

@Test public void testRedundantCaseInsensitiveLexerRuleOption() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive = true; }\n" +
"TOKEN options { caseInsensitive = true; } : [A-F]+;\n",

"warning(" + ErrorType.REDUNDANT_CASE_INSENSITIVE_LEXER_RULE_OPTION.code + "): L.g4:3:16: caseInsensitive lexer rule option is redundant because its value equals to global value (true)\n"
};
testErrors(test, false);

String[] test2 = {
"lexer grammar L;\n" +
"options { caseInsensitive = false; }\n" +
"TOKEN options { caseInsensitive = false; } : [A-F]+;\n",

"warning(" + ErrorType.REDUNDANT_CASE_INSENSITIVE_LEXER_RULE_OPTION.code + "): L.g4:3:16: caseInsensitive lexer rule option is redundant because its value equals to global value (false)\n"
};
testErrors(test2, false);
}

@Test public void testCaseInsensitiveOptionInParseRule() {
String[] test = {
"grammar G;\n" +
"root options { caseInsensitive=true; } : 'token';",

"warning(" + ErrorType.ILLEGAL_OPTION.code + "): G.g4:2:15: unsupported option caseInsensitive\n"
};

testErrors(test, false);
Expand Down
55 changes: 21 additions & 34 deletions tool/src/org/antlr/v4/automata/LexerATNFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ public class LexerATNFactory extends ParserATNFactory {

private final List<String> ruleCommands = new ArrayList<String>();

private final boolean caseInsensitive;

/**
* Maps from an action index to a {@link LexerAction} object.
*/
Expand All @@ -77,8 +75,6 @@ public LexerATNFactory(LexerGrammar g) {
public LexerATNFactory(LexerGrammar g, CodeGenerator codeGenerator) {
super(g);
// use codegen to get correct language templates for lexer commands
String caseInsensitiveOption = g.getOptionString("caseInsensitive");
caseInsensitive = caseInsensitiveOption != null && caseInsensitiveOption.equals("true");
codegenTemplates = (codeGenerator == null ? CodeGenerator.create(g) : codeGenerator).getTemplates();
}

Expand Down Expand Up @@ -192,51 +188,42 @@ public Handle lexerAltCommands(Handle alt, Handle cmds) {

@Override
public Handle lexerCallCommand(GrammarAST ID, GrammarAST arg) {
LexerAction lexerAction = createLexerAction(ID, arg);
if (lexerAction != null) {
return action(ID, lexerAction);
}

// fall back to standard action generation for the command
ST cmdST = codegenTemplates.getInstanceOf("Lexer" +
CharSupport.capitalize(ID.getText())+
"Command");
if (cmdST == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (cmdST.impl.formalArguments == null || !cmdST.impl.formalArguments.containsKey("arg")) {
g.tool.errMgr.grammarError(ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
return action(cmdST.render());
return lexerCallCommandOrCommand(ID, arg);
}

@Override
public Handle lexerCommand(GrammarAST ID) {
LexerAction lexerAction = createLexerAction(ID, null);
return lexerCallCommandOrCommand(ID, null);
}

private Handle lexerCallCommandOrCommand(GrammarAST ID, GrammarAST arg) {
LexerAction lexerAction = createLexerAction(ID, arg);
if (lexerAction != null) {
return action(ID, lexerAction);
}

// fall back to standard action generation for the command
ST cmdST = codegenTemplates.getInstanceOf("Lexer" +
CharSupport.capitalize(ID.getText()) +
CharSupport.capitalize(ID.getText())+
"Command");
if (cmdST == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (cmdST.impl.formalArguments != null && cmdST.impl.formalArguments.containsKey("arg")) {
g.tool.errMgr.grammarError(ErrorType.MISSING_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
boolean callCommand = arg != null;
boolean containsArg = cmdST.impl.formalArguments != null && cmdST.impl.formalArguments.containsKey("arg");
if (callCommand != containsArg) {
ErrorType errorType = callCommand ? ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT : ErrorType.MISSING_LEXER_COMMAND_ARGUMENT;
g.tool.errMgr.grammarError(errorType, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (callCommand) {
cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
}

return action(cmdST.render());
}

Expand Down Expand Up @@ -264,7 +251,7 @@ public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean inver
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) {
checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, null);
checkRangeAndAddToSet(associatedAST, t, set, a, b, currentRule.caseInsensitive, null);
}
}
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
Expand Down Expand Up @@ -553,11 +540,11 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse
}

private void checkCharAndAddToSet(GrammarAST ast, IntervalSet set, int c) {
checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, null);
checkRangeAndAddToSet(ast, ast, set, c, c, currentRule.caseInsensitive, null);
}

private void checkRangeAndAddToSet(GrammarAST mainAst, IntervalSet set, int a, int b) {
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, null);
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, currentRule.caseInsensitive, null);
}

private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, CharactersDataCheckStatus previousStatus) {
Expand Down Expand Up @@ -616,7 +603,7 @@ private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, Gram

private Transition createTransition(ATNState target, int from, int to, CommonTree tree) {
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree, true);
if (caseInsensitive) {
if (currentRule.caseInsensitive) {
if (charactersData.isSingleRange()) {
return CodePointTransitions.createWithCodePointRange(target, from, to);
}
Expand Down
24 changes: 18 additions & 6 deletions tool/src/org/antlr/v4/parse/ANTLRLexer.g
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,26 @@ import org.antlr.v4.runtime.misc.Interval;
* Return token or null if for some reason we can't find the start.
*/
public Token getRuleOrSubruleStartToken() {
if ( tokens==null ) return null;
if (tokens == null) return null;
int i = tokens.index();
int n = tokens.size();
if ( i>=n ) i = n-1; // seems index == n as we lex
while ( i>=0 && i<n) {
int n = tokens.size();
if (i >= n) i = n - 1; // seems index == n as we lex
boolean withinOptionsBlock = false;
while (i >= 0 && i < n) {
int ttype = tokens.get(i).getType();
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
return tokens.get(i);
if (withinOptionsBlock) {
// Ignore rule options content
if (ttype == OPTIONS) {
withinOptionsBlock = false;
}
}
else {
if (ttype == RBRACE) {
withinOptionsBlock = true;
}
else if (ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF) {
return tokens.get(i);
}
}
i--;
}
Expand Down
8 changes: 6 additions & 2 deletions tool/src/org/antlr/v4/parse/ANTLRParser.g
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,13 @@ lexerRule
paraphrases.pop();
}
: FRAGMENT?
TOKEN_REF COLON lexerRuleBlock SEMI
TOKEN_REF

optionsSpec?

COLON lexerRuleBlock SEMI
-> ^( RULE<RuleAST> TOKEN_REF
^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock
^(RULEMODIFIERS FRAGMENT)? optionsSpec? lexerRuleBlock
)
;

Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void discoverRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers
List<GrammarAST> actions,
GrammarAST block) { }
public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers,
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers, GrammarAST options,
GrammarAST block) { }
public void finishLexerRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void ruleCatch(GrammarAST arg, ActionAST action) { }
Expand Down Expand Up @@ -525,7 +525,8 @@ lexerRule
: ^( RULE TOKEN_REF
{currentRuleName=$TOKEN_REF.text; currentRuleAST=$RULE;}
(^(RULEMODIFIERS m=FRAGMENT {mods.add($m);}))?
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, (GrammarAST)input.LT(1));}
opts=optionsSpec*
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, $opts.start, (GrammarAST)input.LT(1));}
lexerRuleBlock
{
finishLexerRule((RuleAST)$RULE, $TOKEN_REF, $lexerRuleBlock.start);
Expand Down
Loading