Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {

"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars a-f used multiple times in set [aa-f]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars D-J used multiple times in set [A-FD-J]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars O-V used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:38: chars O-V used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n"
};
Expand All @@ -411,16 +411,32 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {
"TOKEN_RANGE_3: 'm'..'q' | [M-Q];\n",

"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars a-f used multiple times in set [a-fA-F0-9]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars A-F used multiple times in set [a-fA-F0-9]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars g-l used multiple times in set 'g'..'l' | 'G'..'L'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars G-L used multiple times in set 'g'..'l' | 'G'..'L'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:32: chars g-l used multiple times in set 'g'..'l' | 'G'..'L'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'M' used multiple times in set 'M'..'Q' | 'm'..'q'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'm' used multiple times in set 'M'..'Q' | 'm'..'q'\n"
};

testErrors(test, false);
}

@Test public void testCaseInsensitiveWithUnicodeRanges() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive=true; }\n" +
"FullWidthLetter\n" +
" : '\\u00c0'..'\\u00d6' // ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ\n" +
" | '\\u00f8'..'\\u00ff' // øùúûüýþÿ\n" +
" ;",

""
};

// Don't transform øùúûüýþÿ to uppercase
// ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸ
// because of different length of lower and UPPER range
testErrors(test, false);
}

@Test public void testUnreachableTokens() {
String[] test = {
"lexer grammar Test;\n" +
Expand Down Expand Up @@ -473,12 +489,25 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {
"lexer grammar Test;\n" +
"TOKEN1: 'A'..'g';\n" +
"TOKEN2: [C-m];\n" +
"TOKEN3: [А-я]; // OK since range does not contain intermediate characters",
"TOKEN3: [А-я]; // OK since range does not contain intermediate characters\n" +
"TOKEN4: '\\u0100'..'\\u1fff'; // OK since range borders are unicode characters",

"warning(" + ErrorType.RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS.code + "): Test.g4:2:8: Range A..g probably contains not implied characters [\\]^_`. Both bounds should be defined in lower or UPPER case\n" +
"warning(" + ErrorType.RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS.code + "): Test.g4:3:8: Range C..m probably contains not implied characters [\\]^_`. Both bounds should be defined in lower or UPPER case\n"
};

testErrors(test, false);
}

@Test public void testNotImpliedCharactersWithCaseInsensitiveOption() {
String[] test = {
"lexer grammar Test;\n" +
"options { caseInsensitive=true; }\n" +
"TOKEN: [A-z];",

"warning(" + ErrorType.RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS.code + "): Test.g4:3:7: Range A..z probably contains not implied characters [\\]^_`. Both bounds should be defined in lower or UPPER case\n"
};

testErrors(test, false);
}
}
11 changes: 11 additions & 0 deletions tool/src/org/antlr/v4/automata/CharactersDataCheckStatus.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package org.antlr.v4.automata;

public class CharactersDataCheckStatus {
public final boolean collision;
public final boolean notImpliedCharacters;

public CharactersDataCheckStatus(boolean collision, boolean notImpliedCharacters) {
this.collision = collision;
this.notImpliedCharacters = notImpliedCharacters;
}
}
85 changes: 48 additions & 37 deletions tool/src/org/antlr/v4/automata/LexerATNFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean inver
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) {
checkRangeAndAddToSet(associatedAST, set, a, b, caseInsensitive);
checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, null);
}
}
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
Expand Down Expand Up @@ -517,7 +517,7 @@ private CharSetParseState applyPrevStateAndMoveToCodePoint(
charSetAST.getToken(),
CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint));
}
checkRangeAndAddToSet(charSetAST, set, state.prevCodePoint, codePoint, caseInsensitive);
checkRangeAndAddToSet(charSetAST, set, state.prevCodePoint, codePoint);
state = CharSetParseState.NONE;
}
else {
Expand Down Expand Up @@ -567,60 +567,71 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse
}

private void checkCharAndAddToSet(GrammarAST ast, IntervalSet set, int c) {
checkRangeAndAddToSet(ast, set, c, c, caseInsensitive);
checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, null);
}

private void checkRangeAndAddToSet(GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive) {
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(a, b, g, ast);
private void checkRangeAndAddToSet(GrammarAST mainAst, IntervalSet set, int a, int b) {
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, null);
}

private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, CharactersDataCheckStatus previousStatus) {
CharactersDataCheckStatus status;
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(a, b, g, ast,
previousStatus == null || !previousStatus.notImpliedCharacters);
if (caseInsensitive) {
if (charactersData.lowerFrom == charactersData.upperFrom && charactersData.lowerTo == charactersData.upperTo ||
charactersData.mixOfLowerAndUpperCharCase
) {
checkRangeAndAddToSet(ast, set, a, b, false);
status = new CharactersDataCheckStatus(false, charactersData.mixOfLowerAndUpperCharCase);
if (charactersData.isSingleRange()) {
status = checkRangeAndAddToSet(rootAst, ast, set, a, b, false, status);
}
else {
checkRangeAndAddToSet(ast, set, charactersData.lowerFrom, charactersData.lowerTo, false);
checkRangeAndAddToSet(ast, set, charactersData.upperFrom, charactersData.upperTo, false);
status = checkRangeAndAddToSet(rootAst, ast, set, charactersData.lowerFrom, charactersData.lowerTo, false, status);
// Don't report similar warning twice
status = checkRangeAndAddToSet(rootAst, ast, set, charactersData.upperFrom, charactersData.upperTo, false, status);
}
}
else {
for (int i = a; i <= b; i++) {
if (set.contains(i)) {
String setText;
if (ast.getChildren() == null) {
setText = ast.getText();
}
else {
StringBuilder sb = new StringBuilder();
for (Object child : ast.getChildren()) {
if (child instanceof RangeAST) {
sb.append(((RangeAST) child).getChild(0).getText());
sb.append("..");
sb.append(((RangeAST) child).getChild(1).getText());
}
else {
sb.append(((GrammarAST) child).getText());
boolean charactersCollision = previousStatus != null && previousStatus.collision;
if (!charactersCollision) {
for (int i = a; i <= b; i++) {
if (set.contains(i)) {
String setText;
if (rootAst.getChildren() == null) {
setText = rootAst.getText();
}
else {
StringBuilder sb = new StringBuilder();
for (Object child : rootAst.getChildren()) {
if (child instanceof RangeAST) {
sb.append(((RangeAST) child).getChild(0).getText());
sb.append("..");
sb.append(((RangeAST) child).getChild(1).getText());
}
else {
sb.append(((GrammarAST) child).getText());
}
sb.append(" | ");
}
sb.append(" | ");
sb.replace(sb.length() - 3, sb.length(), "");
setText = sb.toString();
}
sb.replace(sb.length() - 3, sb.length(), "");
setText = sb.toString();
String charsString = a == b ? String.valueOf((char)a) : (char) a + "-" + (char) b;
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
charsString, setText);
charactersCollision = true;
break;
}
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
(char) a + "-" + (char) b, setText);
break;
}
}
status = new CharactersDataCheckStatus(charactersCollision, charactersData.mixOfLowerAndUpperCharCase);
set.add(a, b);
}
return status;
}

private Transition createTransition(ATNState target, int from, int to, CommonTree tree) {
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree);
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree, true);
if (caseInsensitive) {
if (charactersData.lowerFrom == charactersData.upperFrom && charactersData.lowerTo == charactersData.upperTo ||
charactersData.mixOfLowerAndUpperCharCase
) {
if (charactersData.isSingleRange()) {
return CodePointTransitions.createWithCodePointRange(target, from, to);
}
else {
Expand Down
25 changes: 17 additions & 8 deletions tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
import org.antlr.v4.tool.Grammar;

public class RangeBorderCharactersData {
public int lowerFrom;
public int upperFrom;
public int lowerTo;
public int upperTo;
public boolean mixOfLowerAndUpperCharCase;
public final int lowerFrom;
public final int upperFrom;
public final int lowerTo;
public final int upperTo;
public final boolean mixOfLowerAndUpperCharCase;

public RangeBorderCharactersData(int lowerFrom, int upperFrom, int lowerTo, int upperTo, boolean mixOfLowerAndUpperCharCase) {
this.lowerFrom = lowerFrom;
Expand All @@ -19,18 +19,21 @@ public RangeBorderCharactersData(int lowerFrom, int upperFrom, int lowerTo, int
this.mixOfLowerAndUpperCharCase = mixOfLowerAndUpperCharCase;
}

public static RangeBorderCharactersData getAndCheckCharactersData(int from, int to, Grammar grammar, CommonTree tree) {
public static RangeBorderCharactersData getAndCheckCharactersData(int from, int to, Grammar grammar, CommonTree tree,
boolean reportRangeContainsNotImpliedCharacters
) {
int lowerFrom = Character.toLowerCase(from);
int upperFrom = Character.toUpperCase(from);
int lowerTo = Character.toLowerCase(to);
int upperTo = Character.toUpperCase(to);

boolean isLowerFrom = lowerFrom == from;
boolean isLowerTo = lowerTo == to;
boolean mixOfLowerAndUpperCharCase = isLowerFrom && !isLowerTo || !isLowerFrom && isLowerTo;
if (mixOfLowerAndUpperCharCase) {
if (reportRangeContainsNotImpliedCharacters && mixOfLowerAndUpperCharCase && from <= 0x7F && to <= 0x7F) {
StringBuilder notImpliedCharacters = new StringBuilder();
for (int i = from; i < to; i++) {
if (Character.toLowerCase(i) == Character.toUpperCase(i)) {
if (!Character.isAlphabetic(i)) {
notImpliedCharacters.append((char)i);
}
}
Expand All @@ -41,4 +44,10 @@ public static RangeBorderCharactersData getAndCheckCharactersData(int from, int
}
return new RangeBorderCharactersData(lowerFrom, upperFrom, lowerTo, upperTo, mixOfLowerAndUpperCharCase);
}

public boolean isSingleRange() {
return lowerFrom == upperFrom && lowerTo == upperTo ||
mixOfLowerAndUpperCharCase ||
lowerTo - lowerFrom != upperTo - upperFrom;
}
}