From c75ee4e3c4a32c8eb57e17826e3adc2c9cfda629 Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 26 Dec 2021 17:23:05 +0300 Subject: [PATCH 1/4] Improve location for RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS, CHARACTERS_COLLISION_IN_SET warnings Don't report twice similar CHARACTERS_COLLISION_IN_SET warnings if caseInsensitive option enabled --- .../antlr/v4/test/tool/TestSymbolIssues.java | 6 +- .../antlr/v4/automata/LexerATNFactory.java | 76 ++++++++++--------- .../automata/RangeBorderCharactersData.java | 15 ++-- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index 2bec23ae46..2d9ab85680 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -394,7 +394,7 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars a-f used multiple times in set [aa-f]\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars D-J used multiple times in set [A-FD-J]\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars O-V used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:38: chars O-V used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n" }; @@ -411,9 +411,7 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { "TOKEN_RANGE_3: 'm'..'q' | [M-Q];\n", "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars a-f used multiple times in set [a-fA-F0-9]\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars A-F used multiple times in set [a-fA-F0-9]\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars g-l used multiple times in set 'g'..'l' | 'G'..'L'\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars G-L used multiple times in set 'g'..'l' | 'G'..'L'\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:32: chars g-l used multiple times in set 'g'..'l' | 'G'..'L'\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'M' used multiple times in set 'M'..'Q' | 'm'..'q'\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'm' used multiple times in set 'M'..'Q' | 'm'..'q'\n" }; diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index 7cda1e9788..65d65afd26 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -279,7 +279,7 @@ public Handle set(GrammarAST associatedAST, List alts, boolean inver int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText()); int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText()); if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) { - checkRangeAndAddToSet(associatedAST, set, a, b, caseInsensitive); + checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, true); } } else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) { @@ -517,7 +517,7 @@ private CharSetParseState applyPrevStateAndMoveToCodePoint( charSetAST.getToken(), CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint)); } - checkRangeAndAddToSet(charSetAST, set, state.prevCodePoint, codePoint, caseInsensitive); + checkRangeAndAddToSet(charSetAST, set, state.prevCodePoint, codePoint); state = CharSetParseState.NONE; } else { @@ -567,60 +567,66 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse } private void checkCharAndAddToSet(GrammarAST ast, IntervalSet set, int c) { - checkRangeAndAddToSet(ast, set, c, c, caseInsensitive); + checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, true); } - private void checkRangeAndAddToSet(GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive) { + private void checkRangeAndAddToSet(GrammarAST mainAst, IntervalSet set, int a, int b) { + checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, true); + } + + private boolean checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, boolean reportCollision) { + boolean charactersCollision = false; RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(a, b, g, ast); if (caseInsensitive) { - if (charactersData.lowerFrom == charactersData.upperFrom && charactersData.lowerTo == charactersData.upperTo || - charactersData.mixOfLowerAndUpperCharCase - ) { - checkRangeAndAddToSet(ast, set, a, b, false); + if (charactersData.isSingleRange()) { + checkRangeAndAddToSet(rootAst, ast, set, a, b, false, true); } else { - checkRangeAndAddToSet(ast, set, charactersData.lowerFrom, charactersData.lowerTo, false); - checkRangeAndAddToSet(ast, set, charactersData.upperFrom, charactersData.upperTo, false); + charactersCollision = checkRangeAndAddToSet(rootAst, ast, set, charactersData.lowerFrom, charactersData.lowerTo, false, true); + // Don't report similar warning twice + checkRangeAndAddToSet(rootAst, ast, set, charactersData.upperFrom, charactersData.upperTo, false, !charactersCollision); } } else { - for (int i = a; i <= b; i++) { - if (set.contains(i)) { - String setText; - if (ast.getChildren() == null) { - setText = ast.getText(); - } - else { - StringBuilder sb = new StringBuilder(); - for (Object child : ast.getChildren()) { - if (child instanceof RangeAST) { - sb.append(((RangeAST) child).getChild(0).getText()); - sb.append(".."); - sb.append(((RangeAST) child).getChild(1).getText()); - } - else { - sb.append(((GrammarAST) child).getText()); + if (reportCollision) { + for (int i = a; i <= b; i++) { + if (set.contains(i)) { + String setText; + if (rootAst.getChildren() == null) { + setText = rootAst.getText(); + } + else { + StringBuilder sb = new StringBuilder(); + for (Object child : rootAst.getChildren()) { + if (child instanceof RangeAST) { + sb.append(((RangeAST) child).getChild(0).getText()); + sb.append(".."); + sb.append(((RangeAST) child).getChild(1).getText()); + } + else { + sb.append(((GrammarAST) child).getText()); + } + sb.append(" | "); } - sb.append(" | "); + sb.replace(sb.length() - 3, sb.length(), ""); + setText = sb.toString(); } - sb.replace(sb.length() - 3, sb.length(), ""); - setText = sb.toString(); + g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(), + (char) a + "-" + (char) b, setText); + charactersCollision = true; + break; } - g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(), - (char) a + "-" + (char) b, setText); - break; } } set.add(a, b); } + return charactersCollision; } private Transition createTransition(ATNState target, int from, int to, CommonTree tree) { RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree); if (caseInsensitive) { - if (charactersData.lowerFrom == charactersData.upperFrom && charactersData.lowerTo == charactersData.upperTo || - charactersData.mixOfLowerAndUpperCharCase - ) { + if (charactersData.isSingleRange()) { return CodePointTransitions.createWithCodePointRange(target, from, to); } else { diff --git a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java index d682b41bf8..d939e97587 100644 --- a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java +++ b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java @@ -5,11 +5,11 @@ import org.antlr.v4.tool.Grammar; public class RangeBorderCharactersData { - public int lowerFrom; - public int upperFrom; - public int lowerTo; - public int upperTo; - public boolean mixOfLowerAndUpperCharCase; + public final int lowerFrom; + public final int upperFrom; + public final int lowerTo; + public final int upperTo; + public final boolean mixOfLowerAndUpperCharCase; public RangeBorderCharactersData(int lowerFrom, int upperFrom, int lowerTo, int upperTo, boolean mixOfLowerAndUpperCharCase) { this.lowerFrom = lowerFrom; @@ -24,6 +24,7 @@ public static RangeBorderCharactersData getAndCheckCharactersData(int from, int int upperFrom = Character.toUpperCase(from); int lowerTo = Character.toLowerCase(to); int upperTo = Character.toUpperCase(to); + boolean isLowerFrom = lowerFrom == from; boolean isLowerTo = lowerTo == to; boolean mixOfLowerAndUpperCharCase = isLowerFrom && !isLowerTo || !isLowerFrom && isLowerTo; @@ -41,4 +42,8 @@ public static RangeBorderCharactersData getAndCheckCharactersData(int from, int } return new RangeBorderCharactersData(lowerFrom, upperFrom, lowerTo, upperTo, mixOfLowerAndUpperCharCase); } + + public boolean isSingleRange() { + return lowerFrom == upperFrom && lowerTo == upperTo || mixOfLowerAndUpperCharCase; + } } From 4fb4edf0d248aafbc75ba6d7bf5b714175f5fd4c Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 26 Dec 2021 17:26:23 +0300 Subject: [PATCH 2/4] Consider length of lower and UPPER character ranges during case transformation Length should be equal --- .../antlr/v4/test/tool/TestSymbolIssues.java | 18 ++++++++++++++++++ .../v4/automata/RangeBorderCharactersData.java | 4 +++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index 2d9ab85680..20872fec46 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -419,6 +419,24 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { testErrors(test, false); } + @Test public void testCaseInsensitiveWithUnicodeRanges() { + String[] test = { + "lexer grammar L;\n" + + "options { caseInsensitive=true; }\n" + + "FullWidthLetter\n" + + " : '\\u00c0'..'\\u00d6' // ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ\n" + + " | '\\u00f8'..'\\u00ff' // øùúûüýþÿ\n" + + " ;", + + "" + }; + + // Don't transform øùúûüýþÿ to uppercase + // ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸ + // because of different length of lower and UPPER range + testErrors(test, false); + } + @Test public void testUnreachableTokens() { String[] test = { "lexer grammar Test;\n" + diff --git a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java index d939e97587..5febbfd666 100644 --- a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java +++ b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java @@ -44,6 +44,8 @@ public static RangeBorderCharactersData getAndCheckCharactersData(int from, int } public boolean isSingleRange() { - return lowerFrom == upperFrom && lowerTo == upperTo || mixOfLowerAndUpperCharCase; + return lowerFrom == upperFrom && lowerTo == upperTo || + mixOfLowerAndUpperCharCase || + lowerTo - lowerFrom != upperTo - upperFrom; } } From e6578798be7d20073d3eecbf5bfcd5b7dde07f46 Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 26 Dec 2021 19:16:14 +0300 Subject: [PATCH 3/4] Enable RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS only for ASCII characters --- .../test/org/antlr/v4/test/tool/TestSymbolIssues.java | 3 ++- tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index 20872fec46..e51edb65a9 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -489,7 +489,8 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { "lexer grammar Test;\n" + "TOKEN1: 'A'..'g';\n" + "TOKEN2: [C-m];\n" + - "TOKEN3: [А-я]; // OK since range does not contain intermediate characters", + "TOKEN3: [А-я]; // OK since range does not contain intermediate characters\n" + + "TOKEN4: '\\u0100'..'\\u1fff'; // OK since range borders are unicode characters", "warning(" + ErrorType.RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS.code + "): Test.g4:2:8: Range A..g probably contains not implied characters [\\]^_`. Both bounds should be defined in lower or UPPER case\n" + "warning(" + ErrorType.RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS.code + "): Test.g4:3:8: Range C..m probably contains not implied characters [\\]^_`. Both bounds should be defined in lower or UPPER case\n" diff --git a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java index 5febbfd666..7c30c0feb6 100644 --- a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java +++ b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java @@ -28,10 +28,10 @@ public static RangeBorderCharactersData getAndCheckCharactersData(int from, int boolean isLowerFrom = lowerFrom == from; boolean isLowerTo = lowerTo == to; boolean mixOfLowerAndUpperCharCase = isLowerFrom && !isLowerTo || !isLowerFrom && isLowerTo; - if (mixOfLowerAndUpperCharCase) { + if (mixOfLowerAndUpperCharCase && from <= 0x7F && to <= 0x7F) { StringBuilder notImpliedCharacters = new StringBuilder(); for (int i = from; i < to; i++) { - if (Character.toLowerCase(i) == Character.toUpperCase(i)) { + if (!Character.isAlphabetic(i)) { notImpliedCharacters.append((char)i); } } From df62fba4b053f0255b80e4b06f904d31c658fb6a Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 26 Dec 2021 19:43:06 +0300 Subject: [PATCH 4/4] Don't report RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS twice Add CharactersDataCheckStatus --- .../antlr/v4/test/tool/TestSymbolIssues.java | 12 +++++++ .../automata/CharactersDataCheckStatus.java | 11 +++++++ .../antlr/v4/automata/LexerATNFactory.java | 31 +++++++++++-------- .../automata/RangeBorderCharactersData.java | 6 ++-- 4 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 tool/src/org/antlr/v4/automata/CharactersDataCheckStatus.java diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index e51edb65a9..14ae5eff61 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -498,4 +498,16 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { testErrors(test, false); } + + @Test public void testNotImpliedCharactersWithCaseInsensitiveOption() { + String[] test = { + "lexer grammar Test;\n" + + "options { caseInsensitive=true; }\n" + + "TOKEN: [A-z];", + + "warning(" + ErrorType.RANGE_PROBABLY_CONTAINS_NOT_IMPLIED_CHARACTERS.code + "): Test.g4:3:7: Range A..z probably contains not implied characters [\\]^_`. Both bounds should be defined in lower or UPPER case\n" + }; + + testErrors(test, false); + } } diff --git a/tool/src/org/antlr/v4/automata/CharactersDataCheckStatus.java b/tool/src/org/antlr/v4/automata/CharactersDataCheckStatus.java new file mode 100644 index 0000000000..be0ffcf5d9 --- /dev/null +++ b/tool/src/org/antlr/v4/automata/CharactersDataCheckStatus.java @@ -0,0 +1,11 @@ +package org.antlr.v4.automata; + +public class CharactersDataCheckStatus { + public final boolean collision; + public final boolean notImpliedCharacters; + + public CharactersDataCheckStatus(boolean collision, boolean notImpliedCharacters) { + this.collision = collision; + this.notImpliedCharacters = notImpliedCharacters; + } +} diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index 65d65afd26..5f39325d67 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -279,7 +279,7 @@ public Handle set(GrammarAST associatedAST, List alts, boolean inver int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText()); int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText()); if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) { - checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, true); + checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, null); } } else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) { @@ -567,28 +567,31 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse } private void checkCharAndAddToSet(GrammarAST ast, IntervalSet set, int c) { - checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, true); + checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, null); } private void checkRangeAndAddToSet(GrammarAST mainAst, IntervalSet set, int a, int b) { - checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, true); + checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, null); } - private boolean checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, boolean reportCollision) { - boolean charactersCollision = false; - RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(a, b, g, ast); + private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, CharactersDataCheckStatus previousStatus) { + CharactersDataCheckStatus status; + RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(a, b, g, ast, + previousStatus == null || !previousStatus.notImpliedCharacters); if (caseInsensitive) { + status = new CharactersDataCheckStatus(false, charactersData.mixOfLowerAndUpperCharCase); if (charactersData.isSingleRange()) { - checkRangeAndAddToSet(rootAst, ast, set, a, b, false, true); + status = checkRangeAndAddToSet(rootAst, ast, set, a, b, false, status); } else { - charactersCollision = checkRangeAndAddToSet(rootAst, ast, set, charactersData.lowerFrom, charactersData.lowerTo, false, true); + status = checkRangeAndAddToSet(rootAst, ast, set, charactersData.lowerFrom, charactersData.lowerTo, false, status); // Don't report similar warning twice - checkRangeAndAddToSet(rootAst, ast, set, charactersData.upperFrom, charactersData.upperTo, false, !charactersCollision); + status = checkRangeAndAddToSet(rootAst, ast, set, charactersData.upperFrom, charactersData.upperTo, false, status); } } else { - if (reportCollision) { + boolean charactersCollision = previousStatus != null && previousStatus.collision; + if (!charactersCollision) { for (int i = a; i <= b; i++) { if (set.contains(i)) { String setText; @@ -611,20 +614,22 @@ private boolean checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, Interv sb.replace(sb.length() - 3, sb.length(), ""); setText = sb.toString(); } + String charsString = a == b ? String.valueOf((char)a) : (char) a + "-" + (char) b; g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(), - (char) a + "-" + (char) b, setText); + charsString, setText); charactersCollision = true; break; } } } + status = new CharactersDataCheckStatus(charactersCollision, charactersData.mixOfLowerAndUpperCharCase); set.add(a, b); } - return charactersCollision; + return status; } private Transition createTransition(ATNState target, int from, int to, CommonTree tree) { - RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree); + RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree, true); if (caseInsensitive) { if (charactersData.isSingleRange()) { return CodePointTransitions.createWithCodePointRange(target, from, to); diff --git a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java index 7c30c0feb6..4bc44adf60 100644 --- a/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java +++ b/tool/src/org/antlr/v4/automata/RangeBorderCharactersData.java @@ -19,7 +19,9 @@ public RangeBorderCharactersData(int lowerFrom, int upperFrom, int lowerTo, int this.mixOfLowerAndUpperCharCase = mixOfLowerAndUpperCharCase; } - public static RangeBorderCharactersData getAndCheckCharactersData(int from, int to, Grammar grammar, CommonTree tree) { + public static RangeBorderCharactersData getAndCheckCharactersData(int from, int to, Grammar grammar, CommonTree tree, + boolean reportRangeContainsNotImpliedCharacters + ) { int lowerFrom = Character.toLowerCase(from); int upperFrom = Character.toUpperCase(from); int lowerTo = Character.toLowerCase(to); @@ -28,7 +30,7 @@ public static RangeBorderCharactersData getAndCheckCharactersData(int from, int boolean isLowerFrom = lowerFrom == from; boolean isLowerTo = lowerTo == to; boolean mixOfLowerAndUpperCharCase = isLowerFrom && !isLowerTo || !isLowerFrom && isLowerTo; - if (mixOfLowerAndUpperCharCase && from <= 0x7F && to <= 0x7F) { + if (reportRangeContainsNotImpliedCharacters && mixOfLowerAndUpperCharCase && from <= 0x7F && to <= 0x7F) { StringBuilder notImpliedCharacters = new StringBuilder(); for (int i = from; i < to; i++) { if (!Character.isAlphabetic(i)) {