Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand accent support #57

Merged
merged 6 commits into from
Apr 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

- Update LanguageTool to 5.3 (see [LT 5.3 release notes](https://github.com/languagetool-org/languagetool/blob/v5.3/languagetool-standalone/CHANGES.md#53-2021-03-29))
- Update LSP4J to 0.12.0
- Add support for many L<sup>A</sup>T<sub>E</sub>X accents, enhance coverage of existing accents: double acute (e.g., `\H{O}`, &#x0150;), line below (e.g., `\b{h}`, &#x1e96;), cedilla (e.g., `\c{E}`, &#x0228;), dot below (e.g., `\d{A}`, &#x1ea0;), ogonek (e.g., `\k{A}`, &#x0104;), breve (e.g., `\u{A}`, &#x0102;), and caron (e.g., `\v{C}`, &#x010c;) ([PR #57](https://github.com/valentjn/ltex-ls/pull/57) by [@ed359](https://github.com/ed359), fixes [#56](https://github.com/valentjn/ltex-ls/issues/56))
- Add support for special L<sup>A</sup>T<sub>E</sub>X characters: `\L` (&#x0141;), `\SS` (&#x1e9e;), `\i` (&#x0131;), `\j` (&#x0237;), `\l` (&#x0142;) ([PR #57](https://github.com/valentjn/ltex-ls/pull/57) by [@ed359](https://github.com/ed359), fixes [#56](https://github.com/valentjn/ltex-ls/issues/56))
- Add `FR_SPELLING_RULE` as a rule for unknown words ([PR #47](https://github.com/valentjn/ltex-ls/pull/47) by [Nicolas Sicard (@biozic)](https://github.com/biozic))
- Ignore non-object values for `InitializeParams.initializationOptions` (fixes [#65](https://github.com/valentjn/ltex-ls/issues/65))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ private enum Mode {
private static final Pattern emDashPattern = Pattern.compile("^---");
private static final Pattern enDashPattern = Pattern.compile("^--");
private static final Pattern accentPattern1 = Pattern.compile(
"^(\\\\[`'\\^~\"=\\.])(([A-Za-z]|\\\\i)|(\\{([A-Za-z]|\\\\i)\\}))");
"^(\\\\[`'\\^~\"=\\.])(([A-Za-z]|\\\\i|\\\\j)|(\\{([A-Za-z]|\\\\i|\\\\j)\\}))");
private static final Pattern accentPattern2 = Pattern.compile(
"^(\\\\[cr])( *([A-Za-z])|\\{([A-Za-z])\\})");
"^(\\\\[Hbcdkruv])( *([A-Za-z]|\\\\i|\\\\j)|\\{([A-Za-z]|\\\\i|\\\\j)\\})");
private static final Pattern displayMathPattern = Pattern.compile("^\\$\\$");
private static final Pattern verbCommandPattern = Pattern.compile("^\\\\verb\\*?(.).*?\\1");
private static final Pattern rsweaveBeginPattern = Pattern.compile("^<<.*?>>=");
Expand Down Expand Up @@ -446,15 +446,35 @@ private void processBackslash() {
popMode();
addMarkup(command, generateDummy());
} else if (command.equals("\\AA")) {
// capital A with ring above
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00c5"));
} else if (command.equals("\\L")) {
// capital L with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0141"));
} else if (command.equals("\\O")) {
// capital O with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00d8"));
} else if (command.equals("\\SS")) {
// capital sharp S
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u1e9e"));
} else if (command.equals("\\aa")) {
// small a with ring above
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00e5"));
} else if (command.equals("\\ss")) {
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00df"));
} else if (command.equals("\\i")) {
// small i without dot
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0131"));
} else if (command.equals("\\j")) {
// small j without dot
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0237"));
} else if (command.equals("\\l")) {
// small l with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0142"));
} else if (command.equals("\\o")) {
// small o with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00f8"));
} else if (command.equals("\\ss")) {
// small sharp s
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00df"));
} else if (command.equals("\\`") || command.equals("\\'") || command.equals("\\^")
|| command.equals("\\~") || command.equals("\\\"") || command.equals("\\=")
|| command.equals("\\.")) {
Expand All @@ -470,7 +490,9 @@ private void processBackslash() {
} else {
addMarkup(command);
}
} else if (command.equals("\\c") || command.equals("\\r")) {
} else if (command.equals("\\H") || command.equals("\\b") || command.equals("\\c")
|| command.equals("\\d") || command.equals("\\k") || command.equals("\\r")
|| command.equals("\\u") || command.equals("\\v")) {
Matcher matcher = accentPattern2.matcher(this.code.substring(this.pos));

if (!isMathMode(this.curMode) && matcher.find()) {
Expand Down Expand Up @@ -889,6 +911,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
String unicode = "";

switch (accentCommand.charAt(1)) {
// grave
case '`': {
if (letter.equals("A")) unicode = "\u00c0";
else if (letter.equals("E")) unicode = "\u00c8";
Expand All @@ -902,6 +925,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("u")) unicode = "\u00f9";
break;
}
// acute
case '\'': {
if (letter.equals("A")) unicode = "\u00c1";
else if (letter.equals("E")) unicode = "\u00c9";
Expand All @@ -917,6 +941,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("y")) unicode = "\u00fd";
break;
}
// circumflex
case '^': {
if (letter.equals("A")) unicode = "\u00c2";
else if (letter.equals("E")) unicode = "\u00ca";
Expand All @@ -927,11 +952,13 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("a")) unicode = "\u00e2";
else if (letter.equals("e")) unicode = "\u00ea";
else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u00ee";
else if (letter.equals("j") || letter.equals("\\j")) unicode = "\u0135";
else if (letter.equals("o")) unicode = "\u00f4";
else if (letter.equals("u")) unicode = "\u00fb";
else if (letter.equals("y")) unicode = "\u0177";
break;
}
// tilde
case '~': {
if (letter.equals("A")) unicode = "\u00c3";
else if (letter.equals("E")) unicode = "\u1ebc";
Expand All @@ -947,6 +974,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("u")) unicode = "\u0169";
break;
}
// diaeresis/umlaut
case '"': {
if (letter.equals("A")) unicode = "\u00c4";
else if (letter.equals("E")) unicode = "\u00cb";
Expand All @@ -962,6 +990,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("y")) unicode = "\u00ff";
break;
}
// macron
case '=': {
if (letter.equals("A")) unicode = "\u0100";
else if (letter.equals("E")) unicode = "\u0112";
Expand All @@ -977,6 +1006,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("y")) unicode = "\u0233";
break;
}
// dot above
case '.': {
if (letter.equals("A")) unicode = "\u0226";
else if (letter.equals("E")) unicode = "\u0116";
Expand All @@ -987,18 +1017,139 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("o")) unicode = "\u022f";
break;
}
// double acute
case 'H': {
if (letter.equals("O")) unicode = "\u0150";
else if (letter.equals("U")) unicode = "\u0170";
else if (letter.equals("o")) unicode = "\u0151";
else if (letter.equals("u")) unicode = "\u0171";
break;
}
// line below
case 'b': {
if (letter.equals("B")) unicode = "\u1e06";
else if (letter.equals("D")) unicode = "\u1e0e";
else if (letter.equals("K")) unicode = "\u1e34";
else if (letter.equals("L")) unicode = "\u1e3a";
else if (letter.equals("N")) unicode = "\u1e48";
else if (letter.equals("R")) unicode = "\u1e5e";
else if (letter.equals("T")) unicode = "\u1e6e";
else if (letter.equals("Z")) unicode = "\u1e94";
else if (letter.equals("b")) unicode = "\u1e07";
else if (letter.equals("d")) unicode = "\u1e0f";
else if (letter.equals("h")) unicode = "\u1e96";
else if (letter.equals("k")) unicode = "\u1e35";
else if (letter.equals("l")) unicode = "\u1e3b";
else if (letter.equals("n")) unicode = "\u1e49";
else if (letter.equals("r")) unicode = "\u1e5f";
else if (letter.equals("t")) unicode = "\u1e6f";
else if (letter.equals("z")) unicode = "\u1e95";
break;
}
// cedilla
case 'c': {
if (letter.equals("C")) unicode = "\u00c7";
else if (letter.equals("D")) unicode = "\u1e10";
else if (letter.equals("E")) unicode = "\u0228";
else if (letter.equals("G")) unicode = "\u0122";
else if (letter.equals("H")) unicode = "\u1e28";
else if (letter.equals("K")) unicode = "\u0136";
else if (letter.equals("L")) unicode = "\u013b";
else if (letter.equals("N")) unicode = "\u0145";
else if (letter.equals("R")) unicode = "\u0156";
else if (letter.equals("S")) unicode = "\u015e";
else if (letter.equals("T")) unicode = "\u0162";
else if (letter.equals("c")) unicode = "\u00e7";
else if (letter.equals("d")) unicode = "\u1e11";
else if (letter.equals("e")) unicode = "\u0229";
else if (letter.equals("g")) unicode = "\u0123";
else if (letter.equals("h")) unicode = "\u1e29";
else if (letter.equals("k")) unicode = "\u0137";
else if (letter.equals("l")) unicode = "\u013c";
else if (letter.equals("n")) unicode = "\u0146";
else if (letter.equals("r")) unicode = "\u0157";
else if (letter.equals("s")) unicode = "\u015f";
else if (letter.equals("t")) unicode = "\u0163";
break;
}
// dot below
case 'd': {
if (letter.equals("A")) unicode = "\u1ea0";
else if (letter.equals("E")) unicode = "\u1eb8";
else if (letter.equals("I")) unicode = "\u1eca";
else if (letter.equals("O")) unicode = "\u1ecc";
else if (letter.equals("U")) unicode = "\u1ee4";
else if (letter.equals("Y")) unicode = "\u1ef4";
else if (letter.equals("a")) unicode = "\u1ea1";
else if (letter.equals("e")) unicode = "\u1eb9";
else if (letter.equals("i")) unicode = "\u1ecb";
else if (letter.equals("o")) unicode = "\u1ecd";
else if (letter.equals("u")) unicode = "\u1ee5";
else if (letter.equals("y")) unicode = "\u1ef5";
break;
}
// ogonek
case 'k': {
if (letter.equals("A")) unicode = "\u0104";
else if (letter.equals("E")) unicode = "\u0118";
else if (letter.equals("I")) unicode = "\u012e";
else if (letter.equals("O")) unicode = "\u01ea";
else if (letter.equals("U")) unicode = "\u0172";
else if (letter.equals("a")) unicode = "\u0105";
else if (letter.equals("e")) unicode = "\u0119";
else if (letter.equals("i")) unicode = "\u012f";
else if (letter.equals("o")) unicode = "\u01eb";
else if (letter.equals("u")) unicode = "\u0173";
break;
}
// ring above
case 'r': {
if (letter.equals("A")) unicode = "\u00c5";
else if (letter.equals("U")) unicode = "\u016e";
else if (letter.equals("a")) unicode = "\u00e5";
else if (letter.equals("u")) unicode = "\u016f";
break;
}
// breve
case 'u': {
if (letter.equals("A")) unicode = "\u0102";
else if (letter.equals("E")) unicode = "\u0114";
else if (letter.equals("G")) unicode = "\u011e";
else if (letter.equals("I")) unicode = "\u012c";
else if (letter.equals("O")) unicode = "\u014e";
else if (letter.equals("U")) unicode = "\u016c";
else if (letter.equals("a")) unicode = "\u0103";
else if (letter.equals("e")) unicode = "\u0115";
else if (letter.equals("g")) unicode = "\u011f";
else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u012d";
else if (letter.equals("o")) unicode = "\u014f";
else if (letter.equals("u")) unicode = "\u016d";
break;
}
// caron
case 'v': {
if (letter.equals("C")) unicode = "\u010c";
else if (letter.equals("D")) unicode = "\u010e";
else if (letter.equals("E")) unicode = "\u011a";
else if (letter.equals("L")) unicode = "\u013d";
else if (letter.equals("N")) unicode = "\u0147";
else if (letter.equals("R")) unicode = "\u0158";
else if (letter.equals("S")) unicode = "\u0160";
else if (letter.equals("T")) unicode = "\u0164";
else if (letter.equals("Z")) unicode = "\u017d";
else if (letter.equals("c")) unicode = "\u010d";
else if (letter.equals("d")) unicode = "\u010f";
else if (letter.equals("e")) unicode = "\u011b";
else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u01d0";
else if (letter.equals("j") || letter.equals("\\j")) unicode = "\u01f0";
else if (letter.equals("l")) unicode = "\u013e";
else if (letter.equals("n")) unicode = "\u0148";
else if (letter.equals("r")) unicode = "\u0159";
else if (letter.equals("s")) unicode = "\u0161";
else if (letter.equals("t")) unicode = "\u0165";
else if (letter.equals("z")) unicode = "\u017e";
break;
}
default: {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,25 +91,98 @@ public void testTextMode() {
"\u00cb\u00efn T\u00ebxt m\u00eft v\u00ef\u00ebl\u00ebn "
+ "\u00dcml\u00e4\u00fct\u00ebn. ");
assertPlainText(
"\\AA\\O\\aa\\ss\\o"
+ "\\`A\\`E\\`I\\`O\\`U\\`a\\`e\\`i\\`\\i\\`o\\`u"
+ "\\'A\\'E\\'I\\'O\\'U\\'Y\\'a\\'e\\'i\\'\\i\\'o\\'u\\'y"
+ "\\^A\\^E\\^I\\^O\\^U\\^Y\\^a\\^e\\^i\\^\\i\\^o\\^u\\^y"
+ "\\~A\\~E\\~I\\~N\\~O\\~U\\~a\\~e\\~i\\~\\i\\~n\\~o\\~u"
+ "\\\"A\\\"E\\\"I\\\"O\\\"U\\\"Y\\\"a\\\"e\\\"i\\\"\\i\\\"o\\\"u\\\"y"
+ "\\=A\\=E\\=I\\=O\\=U\\=Y\\=a\\=e\\=i\\=\\i\\=o\\=u\\=y"
+ "\\.A\\.E\\.I\\.O\\.a\\.e\\.o",
"\u00c5\u00d8\u00e5\u00df\u00f8"
+ "\u00c0\u00c8\u00cc\u00d2\u00d9\u00e0\u00e8\u00ec\u00ec\u00f2\u00f9"
+ "\u00c1\u00c9\u00cd\u00d3\u00da\u00dd\u00e1\u00e9\u00ed\u00ed\u00f3\u00fa\u00fd"
+ "\u00c2\u00ca\u00ce\u00d4\u00db\u0176\u00e2\u00ea\u00ee\u00ee\u00f4\u00fb\u0177"
+ "\u00c3\u1ebc\u0128\u00d1\u00d5\u0168\u00e3\u1ebd\u0129\u0129\u00f1\u00f5\u0169"
+ "\u00c4\u00cb\u00cf\u00d6\u00dc\u0178\u00e4\u00eb\u00ef\u00ef\u00f6\u00fc\u00ff"
+ "\u0100\u0112\u012a\u014c\u016a\u0232\u0101\u0113\u012b\u012b\u014d\u016b\u0233"
+ "\u0226\u0116\u0130\u022e\u0227\u0117\u022f");
assertPlainText(
"\\c{C}\\c c\\r{A}\\r U\\r a\\r u",
"\u00c7\u00e7\u00c5\u016e\u00e5\u016f");
"\\AA\\L\\O\\SS",
"\u00c5\u0141\u00d8\u1e9e");
assertPlainText(
"\\aa\\i\\j\\l\\o\\ss",
"\u00e5\u0131\u0237\u0142\u00f8\u00df");
assertPlainText(
"\\`A\\`E\\`I\\`O\\`U",
"\u00c0\u00c8\u00cc\u00d2\u00d9");
assertPlainText(
"\\`a\\`e\\`i\\`\\i\\`o\\`u",
"\u00e0\u00e8\u00ec\u00ec\u00f2\u00f9");
assertPlainText(
"\\'A\\'E\\'I\\'O\\'U\\'Y",
"\u00c1\u00c9\u00cd\u00d3\u00da\u00dd");
assertPlainText(
"\\'a\\'e\\'i\\'\\i\\'o\\'u\\'y",
"\u00e1\u00e9\u00ed\u00ed\u00f3\u00fa\u00fd");
assertPlainText(
"\\^A\\^E\\^I\\^O\\^U\\^Y",
"\u00c2\u00ca\u00ce\u00d4\u00db\u0176");
assertPlainText(
"\\^a\\^e\\^i\\^\\i\\^j\\^\\j\\^o\\^u\\^y",
"\u00e2\u00ea\u00ee\u00ee\u0135\u0135\u00f4\u00fb\u0177");
assertPlainText(
"\\~A\\~E\\~I\\~N\\~O\\~U",
"\u00c3\u1ebc\u0128\u00d1\u00d5\u0168");
assertPlainText(
"\\~a\\~e\\~i\\~\\i\\~n\\~o\\~u",
"\u00e3\u1ebd\u0129\u0129\u00f1\u00f5\u0169");
assertPlainText(
"\\\"A\\\"E\\\"I\\\"O\\\"U\\\"Y",
"\u00c4\u00cb\u00cf\u00d6\u00dc\u0178");
assertPlainText(
"\\\"a\\\"e\\\"i\\\"\\i\\\"o\\\"u\\\"y",
"\u00e4\u00eb\u00ef\u00ef\u00f6\u00fc\u00ff");
assertPlainText(
"\\=A\\=E\\=I\\=O\\=U\\=Y",
"\u0100\u0112\u012a\u014c\u016a\u0232");
assertPlainText(
"\\=a\\=e\\=i\\=\\i\\=o\\=u\\=y",
"\u0101\u0113\u012b\u012b\u014d\u016b\u0233");
assertPlainText(
"\\.A\\.E\\.I\\.O",
"\u0226\u0116\u0130\u022e");
assertPlainText(
"\\.a\\.e\\.o",
"\u0227\u0117\u022f");
assertPlainText(
"\\H{O}\\H{U}\\H{o}\\H{u}",
"\u0150\u0170\u0151\u0171");
assertPlainText(
"\\b{B}\\b{D}\\b{K}\\b{L}\\b{N}\\b{R}\\b{T}\\b{Z}",
"\u1e06\u1e0e\u1e34\u1e3a\u1e48\u1e5e\u1e6e\u1e94");
assertPlainText(
"\\b{b}\\b{d}\\b{h}\\b{k}\\b{l}\\b{n}\\b{r}\\b{t}\\b{z}",
"\u1e07\u1e0f\u1e96\u1e35\u1e3b\u1e49\u1e5f\u1e6f\u1e95");
assertPlainText(
"\\c{C}\\c{D}\\c{E}\\c{G}\\c{H}\\c{K}\\c{L}\\c{N}\\c{R}\\c{S}\\c{T}",
"\u00c7\u1e10\u0228\u0122\u1e28\u0136\u013b\u0145\u0156\u015e\u0162");
assertPlainText(
"\\c{c}\\c{d}\\c{e}\\c{g}\\c{h}\\c{k}\\c{l}\\c{n}\\c{r}\\c{s}\\c{t}",
"\u00e7\u1e11\u0229\u0123\u1e29\u0137\u013c\u0146\u0157\u015f\u0163");
assertPlainText(
"\\d{A}\\d{E}\\d{I}\\d{O}\\d{U}\\d{Y}",
"\u1ea0\u1eb8\u1eca\u1ecc\u1ee4\u1ef4");
assertPlainText(
"\\d{a}\\d{e}\\d{i}\\d{o}\\d{u}\\d{y}",
"\u1ea1\u1eb9\u1ecb\u1ecd\u1ee5\u1ef5");
assertPlainText(
"\\k{A}\\k{E}\\k{I}\\k{O}\\k{U}",
"\u0104\u0118\u012e\u01ea\u0172");
assertPlainText(
"\\k{a}\\k{e}\\k{i}\\k{o}\\k{u}",
"\u0105\u0119\u012f\u01eb\u0173");
assertPlainText(
"\\r{A}\\r{U}",
"\u00c5\u016e");
assertPlainText(
"\\r{a}\\r{u}",
"\u00e5\u016f");
assertPlainText(
"\\u{A}\\u{E}\\u{G}\\u{I}\\u{O}\\u{U}",
"\u0102\u0114\u011e\u012c\u014e\u016c");
assertPlainText(
"\\v{C}\\v{D}\\v{E}\\v{L}\\v{N}\\v{R}\\v{S}\\v{T}\\v{Z}",
"\u010c\u010e\u011a\u013d\u0147\u0158\u0160\u0164\u017d");
assertPlainText(
"\\v{c}\\v{d}\\v{e}\\v{i}\\v{\\i}\\v{j}\\v{\\j}\\v{l}\\v{n}\\v{r}\\v{s}\\v{t}\\v{z}",
"\u010d\u010f\u011b\u01d0\u01d0\u01f0\u01f0\u013e\u0148\u0159\u0161\u0165\u017e");
assertPlainText(
"\\u{a}\\u{e}\\u{g}\\u{i}\\u{\\i}\\u{o}\\u{u}",
"\u0103\u0115\u011f\u012d\u012d\u014f\u016d");
assertPlainText(
"This is a test: a, b, \\dots, c.\n",
"This is a test: a, b, \u2026, c. ");
Expand Down