Skip to content

Commit

Permalink
Merge pull request #57 from ed359/develop
Browse files Browse the repository at this point in the history
Expand accent support
  • Loading branch information
valentjn authored Apr 4, 2021
2 parents 03cf2fb + f77b38a commit 90d7e8f
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 24 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

- Update LanguageTool to 5.3 (see [LT 5.3 release notes](https://github.com/languagetool-org/languagetool/blob/v5.3/languagetool-standalone/CHANGES.md#53-2021-03-29))
- Update LSP4J to 0.12.0
- Add support for many L<sup>A</sup>T<sub>E</sub>X accents, enhance coverage of existing accents: double acute (e.g., `\H{O}`, &#x0150;), line below (e.g., `\b{h}`, &#x1e96;), cedilla (e.g., `\c{E}`, &#x0228;), dot below (e.g., `\d{A}`, &#x1ea0;), ogonek (e.g., `\k{A}`, &#x0104;), breve (e.g., `\u{A}`, &#x0102;), and caron (e.g., `\v{C}`, &#x010c;) ([PR #57](https://github.com/valentjn/ltex-ls/pull/57) by [@ed359](https://github.com/ed359), fixes [#56](https://github.com/valentjn/ltex-ls/issues/56))
- Add support for special L<sup>A</sup>T<sub>E</sub>X characters: `\L` (&#x0141;), `\SS` (&#x1e9e;), `\i` (&#x0131;), `\j` (&#x0237;), `\l` (&#x0142;) ([PR #57](https://github.com/valentjn/ltex-ls/pull/57) by [@ed359](https://github.com/ed359), fixes [#56](https://github.com/valentjn/ltex-ls/issues/56))
- Add `FR_SPELLING_RULE` as a rule for unknown words ([PR #47](https://github.com/valentjn/ltex-ls/pull/47) by [Nicolas Sicard (@biozic)](https://github.com/biozic))
- Ignore non-object values for `InitializeParams.initializationOptions` (fixes [#65](https://github.com/valentjn/ltex-ls/issues/65))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ private enum Mode {
private static final Pattern emDashPattern = Pattern.compile("^---");
private static final Pattern enDashPattern = Pattern.compile("^--");
private static final Pattern accentPattern1 = Pattern.compile(
"^(\\\\[`'\\^~\"=\\.])(([A-Za-z]|\\\\i)|(\\{([A-Za-z]|\\\\i)\\}))");
"^(\\\\[`'\\^~\"=\\.])(([A-Za-z]|\\\\i|\\\\j)|(\\{([A-Za-z]|\\\\i|\\\\j)\\}))");
private static final Pattern accentPattern2 = Pattern.compile(
"^(\\\\[cr])( *([A-Za-z])|\\{([A-Za-z])\\})");
"^(\\\\[Hbcdkruv])( *([A-Za-z]|\\\\i|\\\\j)|\\{([A-Za-z]|\\\\i|\\\\j)\\})");
private static final Pattern displayMathPattern = Pattern.compile("^\\$\\$");
private static final Pattern verbCommandPattern = Pattern.compile("^\\\\verb\\*?(.).*?\\1");
private static final Pattern rsweaveBeginPattern = Pattern.compile("^<<.*?>>=");
Expand Down Expand Up @@ -446,15 +446,35 @@ private void processBackslash() {
popMode();
addMarkup(command, generateDummy());
} else if (command.equals("\\AA")) {
// capital A with ring above
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00c5"));
} else if (command.equals("\\L")) {
// capital L with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0141"));
} else if (command.equals("\\O")) {
// capital O with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00d8"));
} else if (command.equals("\\SS")) {
// capital sharp S
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u1e9e"));
} else if (command.equals("\\aa")) {
// small a with ring above
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00e5"));
} else if (command.equals("\\ss")) {
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00df"));
} else if (command.equals("\\i")) {
// small i without dot
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0131"));
} else if (command.equals("\\j")) {
// small j without dot
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0237"));
} else if (command.equals("\\l")) {
// small l with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0142"));
} else if (command.equals("\\o")) {
// small o with stroke
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00f8"));
} else if (command.equals("\\ss")) {
// small sharp s
addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00df"));
} else if (command.equals("\\`") || command.equals("\\'") || command.equals("\\^")
|| command.equals("\\~") || command.equals("\\\"") || command.equals("\\=")
|| command.equals("\\.")) {
Expand All @@ -470,7 +490,9 @@ private void processBackslash() {
} else {
addMarkup(command);
}
} else if (command.equals("\\c") || command.equals("\\r")) {
} else if (command.equals("\\H") || command.equals("\\b") || command.equals("\\c")
|| command.equals("\\d") || command.equals("\\k") || command.equals("\\r")
|| command.equals("\\u") || command.equals("\\v")) {
Matcher matcher = accentPattern2.matcher(this.code.substring(this.pos));

if (!isMathMode(this.curMode) && matcher.find()) {
Expand Down Expand Up @@ -889,6 +911,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
String unicode = "";

switch (accentCommand.charAt(1)) {
// grave
case '`': {
if (letter.equals("A")) unicode = "\u00c0";
else if (letter.equals("E")) unicode = "\u00c8";
Expand All @@ -902,6 +925,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("u")) unicode = "\u00f9";
break;
}
// acute
case '\'': {
if (letter.equals("A")) unicode = "\u00c1";
else if (letter.equals("E")) unicode = "\u00c9";
Expand All @@ -917,6 +941,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("y")) unicode = "\u00fd";
break;
}
// circumflex
case '^': {
if (letter.equals("A")) unicode = "\u00c2";
else if (letter.equals("E")) unicode = "\u00ca";
Expand All @@ -927,11 +952,13 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("a")) unicode = "\u00e2";
else if (letter.equals("e")) unicode = "\u00ea";
else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u00ee";
else if (letter.equals("j") || letter.equals("\\j")) unicode = "\u0135";
else if (letter.equals("o")) unicode = "\u00f4";
else if (letter.equals("u")) unicode = "\u00fb";
else if (letter.equals("y")) unicode = "\u0177";
break;
}
// tilde
case '~': {
if (letter.equals("A")) unicode = "\u00c3";
else if (letter.equals("E")) unicode = "\u1ebc";
Expand All @@ -947,6 +974,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("u")) unicode = "\u0169";
break;
}
// diaeresis/umlaut
case '"': {
if (letter.equals("A")) unicode = "\u00c4";
else if (letter.equals("E")) unicode = "\u00cb";
Expand All @@ -962,6 +990,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("y")) unicode = "\u00ff";
break;
}
// macron
case '=': {
if (letter.equals("A")) unicode = "\u0100";
else if (letter.equals("E")) unicode = "\u0112";
Expand All @@ -977,6 +1006,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("y")) unicode = "\u0233";
break;
}
// dot above
case '.': {
if (letter.equals("A")) unicode = "\u0226";
else if (letter.equals("E")) unicode = "\u0116";
Expand All @@ -987,18 +1017,139 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter
else if (letter.equals("o")) unicode = "\u022f";
break;
}
// double acute
case 'H': {
if (letter.equals("O")) unicode = "\u0150";
else if (letter.equals("U")) unicode = "\u0170";
else if (letter.equals("o")) unicode = "\u0151";
else if (letter.equals("u")) unicode = "\u0171";
break;
}
// line below
case 'b': {
if (letter.equals("B")) unicode = "\u1e06";
else if (letter.equals("D")) unicode = "\u1e0e";
else if (letter.equals("K")) unicode = "\u1e34";
else if (letter.equals("L")) unicode = "\u1e3a";
else if (letter.equals("N")) unicode = "\u1e48";
else if (letter.equals("R")) unicode = "\u1e5e";
else if (letter.equals("T")) unicode = "\u1e6e";
else if (letter.equals("Z")) unicode = "\u1e94";
else if (letter.equals("b")) unicode = "\u1e07";
else if (letter.equals("d")) unicode = "\u1e0f";
else if (letter.equals("h")) unicode = "\u1e96";
else if (letter.equals("k")) unicode = "\u1e35";
else if (letter.equals("l")) unicode = "\u1e3b";
else if (letter.equals("n")) unicode = "\u1e49";
else if (letter.equals("r")) unicode = "\u1e5f";
else if (letter.equals("t")) unicode = "\u1e6f";
else if (letter.equals("z")) unicode = "\u1e95";
break;
}
// cedilla
case 'c': {
if (letter.equals("C")) unicode = "\u00c7";
else if (letter.equals("D")) unicode = "\u1e10";
else if (letter.equals("E")) unicode = "\u0228";
else if (letter.equals("G")) unicode = "\u0122";
else if (letter.equals("H")) unicode = "\u1e28";
else if (letter.equals("K")) unicode = "\u0136";
else if (letter.equals("L")) unicode = "\u013b";
else if (letter.equals("N")) unicode = "\u0145";
else if (letter.equals("R")) unicode = "\u0156";
else if (letter.equals("S")) unicode = "\u015e";
else if (letter.equals("T")) unicode = "\u0162";
else if (letter.equals("c")) unicode = "\u00e7";
else if (letter.equals("d")) unicode = "\u1e11";
else if (letter.equals("e")) unicode = "\u0229";
else if (letter.equals("g")) unicode = "\u0123";
else if (letter.equals("h")) unicode = "\u1e29";
else if (letter.equals("k")) unicode = "\u0137";
else if (letter.equals("l")) unicode = "\u013c";
else if (letter.equals("n")) unicode = "\u0146";
else if (letter.equals("r")) unicode = "\u0157";
else if (letter.equals("s")) unicode = "\u015f";
else if (letter.equals("t")) unicode = "\u0163";
break;
}
// dot below
case 'd': {
if (letter.equals("A")) unicode = "\u1ea0";
else if (letter.equals("E")) unicode = "\u1eb8";
else if (letter.equals("I")) unicode = "\u1eca";
else if (letter.equals("O")) unicode = "\u1ecc";
else if (letter.equals("U")) unicode = "\u1ee4";
else if (letter.equals("Y")) unicode = "\u1ef4";
else if (letter.equals("a")) unicode = "\u1ea1";
else if (letter.equals("e")) unicode = "\u1eb9";
else if (letter.equals("i")) unicode = "\u1ecb";
else if (letter.equals("o")) unicode = "\u1ecd";
else if (letter.equals("u")) unicode = "\u1ee5";
else if (letter.equals("y")) unicode = "\u1ef5";
break;
}
// ogonek
case 'k': {
if (letter.equals("A")) unicode = "\u0104";
else if (letter.equals("E")) unicode = "\u0118";
else if (letter.equals("I")) unicode = "\u012e";
else if (letter.equals("O")) unicode = "\u01ea";
else if (letter.equals("U")) unicode = "\u0172";
else if (letter.equals("a")) unicode = "\u0105";
else if (letter.equals("e")) unicode = "\u0119";
else if (letter.equals("i")) unicode = "\u012f";
else if (letter.equals("o")) unicode = "\u01eb";
else if (letter.equals("u")) unicode = "\u0173";
break;
}
// ring above
case 'r': {
if (letter.equals("A")) unicode = "\u00c5";
else if (letter.equals("U")) unicode = "\u016e";
else if (letter.equals("a")) unicode = "\u00e5";
else if (letter.equals("u")) unicode = "\u016f";
break;
}
// breve
case 'u': {
if (letter.equals("A")) unicode = "\u0102";
else if (letter.equals("E")) unicode = "\u0114";
else if (letter.equals("G")) unicode = "\u011e";
else if (letter.equals("I")) unicode = "\u012c";
else if (letter.equals("O")) unicode = "\u014e";
else if (letter.equals("U")) unicode = "\u016c";
else if (letter.equals("a")) unicode = "\u0103";
else if (letter.equals("e")) unicode = "\u0115";
else if (letter.equals("g")) unicode = "\u011f";
else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u012d";
else if (letter.equals("o")) unicode = "\u014f";
else if (letter.equals("u")) unicode = "\u016d";
break;
}
// caron
case 'v': {
if (letter.equals("C")) unicode = "\u010c";
else if (letter.equals("D")) unicode = "\u010e";
else if (letter.equals("E")) unicode = "\u011a";
else if (letter.equals("L")) unicode = "\u013d";
else if (letter.equals("N")) unicode = "\u0147";
else if (letter.equals("R")) unicode = "\u0158";
else if (letter.equals("S")) unicode = "\u0160";
else if (letter.equals("T")) unicode = "\u0164";
else if (letter.equals("Z")) unicode = "\u017d";
else if (letter.equals("c")) unicode = "\u010d";
else if (letter.equals("d")) unicode = "\u010f";
else if (letter.equals("e")) unicode = "\u011b";
else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u01d0";
else if (letter.equals("j") || letter.equals("\\j")) unicode = "\u01f0";
else if (letter.equals("l")) unicode = "\u013e";
else if (letter.equals("n")) unicode = "\u0148";
else if (letter.equals("r")) unicode = "\u0159";
else if (letter.equals("s")) unicode = "\u0161";
else if (letter.equals("t")) unicode = "\u0165";
else if (letter.equals("z")) unicode = "\u017e";
break;
}
default: {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,25 +91,98 @@ public void testTextMode() {
"\u00cb\u00efn T\u00ebxt m\u00eft v\u00ef\u00ebl\u00ebn "
+ "\u00dcml\u00e4\u00fct\u00ebn. ");
assertPlainText(
"\\AA\\O\\aa\\ss\\o"
+ "\\`A\\`E\\`I\\`O\\`U\\`a\\`e\\`i\\`\\i\\`o\\`u"
+ "\\'A\\'E\\'I\\'O\\'U\\'Y\\'a\\'e\\'i\\'\\i\\'o\\'u\\'y"
+ "\\^A\\^E\\^I\\^O\\^U\\^Y\\^a\\^e\\^i\\^\\i\\^o\\^u\\^y"
+ "\\~A\\~E\\~I\\~N\\~O\\~U\\~a\\~e\\~i\\~\\i\\~n\\~o\\~u"
+ "\\\"A\\\"E\\\"I\\\"O\\\"U\\\"Y\\\"a\\\"e\\\"i\\\"\\i\\\"o\\\"u\\\"y"
+ "\\=A\\=E\\=I\\=O\\=U\\=Y\\=a\\=e\\=i\\=\\i\\=o\\=u\\=y"
+ "\\.A\\.E\\.I\\.O\\.a\\.e\\.o",
"\u00c5\u00d8\u00e5\u00df\u00f8"
+ "\u00c0\u00c8\u00cc\u00d2\u00d9\u00e0\u00e8\u00ec\u00ec\u00f2\u00f9"
+ "\u00c1\u00c9\u00cd\u00d3\u00da\u00dd\u00e1\u00e9\u00ed\u00ed\u00f3\u00fa\u00fd"
+ "\u00c2\u00ca\u00ce\u00d4\u00db\u0176\u00e2\u00ea\u00ee\u00ee\u00f4\u00fb\u0177"
+ "\u00c3\u1ebc\u0128\u00d1\u00d5\u0168\u00e3\u1ebd\u0129\u0129\u00f1\u00f5\u0169"
+ "\u00c4\u00cb\u00cf\u00d6\u00dc\u0178\u00e4\u00eb\u00ef\u00ef\u00f6\u00fc\u00ff"
+ "\u0100\u0112\u012a\u014c\u016a\u0232\u0101\u0113\u012b\u012b\u014d\u016b\u0233"
+ "\u0226\u0116\u0130\u022e\u0227\u0117\u022f");
assertPlainText(
"\\c{C}\\c c\\r{A}\\r U\\r a\\r u",
"\u00c7\u00e7\u00c5\u016e\u00e5\u016f");
"\\AA\\L\\O\\SS",
"\u00c5\u0141\u00d8\u1e9e");
assertPlainText(
"\\aa\\i\\j\\l\\o\\ss",
"\u00e5\u0131\u0237\u0142\u00f8\u00df");
assertPlainText(
"\\`A\\`E\\`I\\`O\\`U",
"\u00c0\u00c8\u00cc\u00d2\u00d9");
assertPlainText(
"\\`a\\`e\\`i\\`\\i\\`o\\`u",
"\u00e0\u00e8\u00ec\u00ec\u00f2\u00f9");
assertPlainText(
"\\'A\\'E\\'I\\'O\\'U\\'Y",
"\u00c1\u00c9\u00cd\u00d3\u00da\u00dd");
assertPlainText(
"\\'a\\'e\\'i\\'\\i\\'o\\'u\\'y",
"\u00e1\u00e9\u00ed\u00ed\u00f3\u00fa\u00fd");
assertPlainText(
"\\^A\\^E\\^I\\^O\\^U\\^Y",
"\u00c2\u00ca\u00ce\u00d4\u00db\u0176");
assertPlainText(
"\\^a\\^e\\^i\\^\\i\\^j\\^\\j\\^o\\^u\\^y",
"\u00e2\u00ea\u00ee\u00ee\u0135\u0135\u00f4\u00fb\u0177");
assertPlainText(
"\\~A\\~E\\~I\\~N\\~O\\~U",
"\u00c3\u1ebc\u0128\u00d1\u00d5\u0168");
assertPlainText(
"\\~a\\~e\\~i\\~\\i\\~n\\~o\\~u",
"\u00e3\u1ebd\u0129\u0129\u00f1\u00f5\u0169");
assertPlainText(
"\\\"A\\\"E\\\"I\\\"O\\\"U\\\"Y",
"\u00c4\u00cb\u00cf\u00d6\u00dc\u0178");
assertPlainText(
"\\\"a\\\"e\\\"i\\\"\\i\\\"o\\\"u\\\"y",
"\u00e4\u00eb\u00ef\u00ef\u00f6\u00fc\u00ff");
assertPlainText(
"\\=A\\=E\\=I\\=O\\=U\\=Y",
"\u0100\u0112\u012a\u014c\u016a\u0232");
assertPlainText(
"\\=a\\=e\\=i\\=\\i\\=o\\=u\\=y",
"\u0101\u0113\u012b\u012b\u014d\u016b\u0233");
assertPlainText(
"\\.A\\.E\\.I\\.O",
"\u0226\u0116\u0130\u022e");
assertPlainText(
"\\.a\\.e\\.o",
"\u0227\u0117\u022f");
assertPlainText(
"\\H{O}\\H{U}\\H{o}\\H{u}",
"\u0150\u0170\u0151\u0171");
assertPlainText(
"\\b{B}\\b{D}\\b{K}\\b{L}\\b{N}\\b{R}\\b{T}\\b{Z}",
"\u1e06\u1e0e\u1e34\u1e3a\u1e48\u1e5e\u1e6e\u1e94");
assertPlainText(
"\\b{b}\\b{d}\\b{h}\\b{k}\\b{l}\\b{n}\\b{r}\\b{t}\\b{z}",
"\u1e07\u1e0f\u1e96\u1e35\u1e3b\u1e49\u1e5f\u1e6f\u1e95");
assertPlainText(
"\\c{C}\\c{D}\\c{E}\\c{G}\\c{H}\\c{K}\\c{L}\\c{N}\\c{R}\\c{S}\\c{T}",
"\u00c7\u1e10\u0228\u0122\u1e28\u0136\u013b\u0145\u0156\u015e\u0162");
assertPlainText(
"\\c{c}\\c{d}\\c{e}\\c{g}\\c{h}\\c{k}\\c{l}\\c{n}\\c{r}\\c{s}\\c{t}",
"\u00e7\u1e11\u0229\u0123\u1e29\u0137\u013c\u0146\u0157\u015f\u0163");
assertPlainText(
"\\d{A}\\d{E}\\d{I}\\d{O}\\d{U}\\d{Y}",
"\u1ea0\u1eb8\u1eca\u1ecc\u1ee4\u1ef4");
assertPlainText(
"\\d{a}\\d{e}\\d{i}\\d{o}\\d{u}\\d{y}",
"\u1ea1\u1eb9\u1ecb\u1ecd\u1ee5\u1ef5");
assertPlainText(
"\\k{A}\\k{E}\\k{I}\\k{O}\\k{U}",
"\u0104\u0118\u012e\u01ea\u0172");
assertPlainText(
"\\k{a}\\k{e}\\k{i}\\k{o}\\k{u}",
"\u0105\u0119\u012f\u01eb\u0173");
assertPlainText(
"\\r{A}\\r{U}",
"\u00c5\u016e");
assertPlainText(
"\\r{a}\\r{u}",
"\u00e5\u016f");
assertPlainText(
"\\u{A}\\u{E}\\u{G}\\u{I}\\u{O}\\u{U}",
"\u0102\u0114\u011e\u012c\u014e\u016c");
assertPlainText(
"\\v{C}\\v{D}\\v{E}\\v{L}\\v{N}\\v{R}\\v{S}\\v{T}\\v{Z}",
"\u010c\u010e\u011a\u013d\u0147\u0158\u0160\u0164\u017d");
assertPlainText(
"\\v{c}\\v{d}\\v{e}\\v{i}\\v{\\i}\\v{j}\\v{\\j}\\v{l}\\v{n}\\v{r}\\v{s}\\v{t}\\v{z}",
"\u010d\u010f\u011b\u01d0\u01d0\u01f0\u01f0\u013e\u0148\u0159\u0161\u0165\u017e");
assertPlainText(
"\\u{a}\\u{e}\\u{g}\\u{i}\\u{\\i}\\u{o}\\u{u}",
"\u0103\u0115\u011f\u012d\u012d\u014f\u016d");
assertPlainText(
"This is a test: a, b, \\dots, c.\n",
"This is a test: a, b, \u2026, c. ");
Expand Down

0 comments on commit 90d7e8f

Please sign in to comment.