diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b80d567..60e27e2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ - Update LanguageTool to 5.3 (see [LT 5.3 release notes](https://github.com/languagetool-org/languagetool/blob/v5.3/languagetool-standalone/CHANGES.md#53-2021-03-29)) - Update LSP4J to 0.12.0 +- Add support for many LATEX accents, enhance coverage of existing accents: double acute (e.g., `\H{O}`, Ő), line below (e.g., `\b{h}`, ẖ), cedilla (e.g., `\c{E}`, Ȩ), dot below (e.g., `\d{A}`, Ạ), ogonek (e.g., `\k{A}`, Ą), breve (e.g., `\u{A}`, Ă), and caron (e.g., `\v{C}`, Č) ([PR #57](https://github.com/valentjn/ltex-ls/pull/57) by [@ed359](https://github.com/ed359), fixes [#56](https://github.com/valentjn/ltex-ls/issues/56)) +- Add support for special LATEX characters: `\L` (Ł), `\SS` (ẞ), `\i` (ı), `\j` (ȷ), `\l` (ł) ([PR #57](https://github.com/valentjn/ltex-ls/pull/57) by [@ed359](https://github.com/ed359), fixes [#56](https://github.com/valentjn/ltex-ls/issues/56)) - Add `FR_SPELLING_RULE` as a rule for unknown words ([PR #47](https://github.com/valentjn/ltex-ls/pull/47) by [Nicolas Sicard (@biozic)](https://github.com/biozic)) - Ignore non-object values for `InitializeParams.initializationOptions` (fixes [#65](https://github.com/valentjn/ltex-ls/issues/65)) diff --git a/ltexls-core/src/main/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilder.java b/ltexls-core/src/main/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilder.java index ca94f0f7..5f8ac5c4 100644 --- a/ltexls-core/src/main/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilder.java +++ b/ltexls-core/src/main/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilder.java @@ -59,9 +59,9 @@ private enum Mode { private static final Pattern emDashPattern = Pattern.compile("^---"); private static final Pattern enDashPattern = Pattern.compile("^--"); private static final Pattern accentPattern1 = Pattern.compile( - "^(\\\\[`'\\^~\"=\\.])(([A-Za-z]|\\\\i)|(\\{([A-Za-z]|\\\\i)\\}))"); + "^(\\\\[`'\\^~\"=\\.])(([A-Za-z]|\\\\i|\\\\j)|(\\{([A-Za-z]|\\\\i|\\\\j)\\}))"); private static final Pattern accentPattern2 = Pattern.compile( - "^(\\\\[cr])( *([A-Za-z])|\\{([A-Za-z])\\})"); + "^(\\\\[Hbcdkruv])( *([A-Za-z]|\\\\i|\\\\j)|\\{([A-Za-z]|\\\\i|\\\\j)\\})"); private static final Pattern displayMathPattern = Pattern.compile("^\\$\\$"); private static final Pattern verbCommandPattern = Pattern.compile("^\\\\verb\\*?(.).*?\\1"); private static final Pattern rsweaveBeginPattern = Pattern.compile("^<<.*?>>="); @@ -446,15 +446,35 @@ private void processBackslash() { popMode(); addMarkup(command, generateDummy()); } else if (command.equals("\\AA")) { + // capital A with ring above addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00c5")); + } else if (command.equals("\\L")) { + // capital L with stroke + addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0141")); } else if (command.equals("\\O")) { + // capital O with stroke addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00d8")); + } else if (command.equals("\\SS")) { + // capital sharp S + addMarkup(command, (isMathMode(this.curMode) ? "" : "\u1e9e")); } else if (command.equals("\\aa")) { + // small a with ring above addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00e5")); - } else if (command.equals("\\ss")) { - addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00df")); + } else if (command.equals("\\i")) { + // small i without dot + addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0131")); + } else if (command.equals("\\j")) { + // small j without dot + addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0237")); + } else if (command.equals("\\l")) { + // small l with stroke + addMarkup(command, (isMathMode(this.curMode) ? "" : "\u0142")); } else if (command.equals("\\o")) { + // small o with stroke addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00f8")); + } else if (command.equals("\\ss")) { + // small sharp s + addMarkup(command, (isMathMode(this.curMode) ? "" : "\u00df")); } else if (command.equals("\\`") || command.equals("\\'") || command.equals("\\^") || command.equals("\\~") || command.equals("\\\"") || command.equals("\\=") || command.equals("\\.")) { @@ -470,7 +490,9 @@ private void processBackslash() { } else { addMarkup(command); } - } else if (command.equals("\\c") || command.equals("\\r")) { + } else if (command.equals("\\H") || command.equals("\\b") || command.equals("\\c") + || command.equals("\\d") || command.equals("\\k") || command.equals("\\r") + || command.equals("\\u") || command.equals("\\v")) { Matcher matcher = accentPattern2.matcher(this.code.substring(this.pos)); if (!isMathMode(this.curMode) && matcher.find()) { @@ -889,6 +911,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter String unicode = ""; switch (accentCommand.charAt(1)) { + // grave case '`': { if (letter.equals("A")) unicode = "\u00c0"; else if (letter.equals("E")) unicode = "\u00c8"; @@ -902,6 +925,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("u")) unicode = "\u00f9"; break; } + // acute case '\'': { if (letter.equals("A")) unicode = "\u00c1"; else if (letter.equals("E")) unicode = "\u00c9"; @@ -917,6 +941,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("y")) unicode = "\u00fd"; break; } + // circumflex case '^': { if (letter.equals("A")) unicode = "\u00c2"; else if (letter.equals("E")) unicode = "\u00ca"; @@ -927,11 +952,13 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("a")) unicode = "\u00e2"; else if (letter.equals("e")) unicode = "\u00ea"; else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u00ee"; + else if (letter.equals("j") || letter.equals("\\j")) unicode = "\u0135"; else if (letter.equals("o")) unicode = "\u00f4"; else if (letter.equals("u")) unicode = "\u00fb"; else if (letter.equals("y")) unicode = "\u0177"; break; } + // tilde case '~': { if (letter.equals("A")) unicode = "\u00c3"; else if (letter.equals("E")) unicode = "\u1ebc"; @@ -947,6 +974,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("u")) unicode = "\u0169"; break; } + // diaeresis/umlaut case '"': { if (letter.equals("A")) unicode = "\u00c4"; else if (letter.equals("E")) unicode = "\u00cb"; @@ -962,6 +990,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("y")) unicode = "\u00ff"; break; } + // macron case '=': { if (letter.equals("A")) unicode = "\u0100"; else if (letter.equals("E")) unicode = "\u0112"; @@ -977,6 +1006,7 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("y")) unicode = "\u0233"; break; } + // dot above case '.': { if (letter.equals("A")) unicode = "\u0226"; else if (letter.equals("E")) unicode = "\u0116"; @@ -987,11 +1017,92 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("o")) unicode = "\u022f"; break; } + // double acute + case 'H': { + if (letter.equals("O")) unicode = "\u0150"; + else if (letter.equals("U")) unicode = "\u0170"; + else if (letter.equals("o")) unicode = "\u0151"; + else if (letter.equals("u")) unicode = "\u0171"; + break; + } + // line below + case 'b': { + if (letter.equals("B")) unicode = "\u1e06"; + else if (letter.equals("D")) unicode = "\u1e0e"; + else if (letter.equals("K")) unicode = "\u1e34"; + else if (letter.equals("L")) unicode = "\u1e3a"; + else if (letter.equals("N")) unicode = "\u1e48"; + else if (letter.equals("R")) unicode = "\u1e5e"; + else if (letter.equals("T")) unicode = "\u1e6e"; + else if (letter.equals("Z")) unicode = "\u1e94"; + else if (letter.equals("b")) unicode = "\u1e07"; + else if (letter.equals("d")) unicode = "\u1e0f"; + else if (letter.equals("h")) unicode = "\u1e96"; + else if (letter.equals("k")) unicode = "\u1e35"; + else if (letter.equals("l")) unicode = "\u1e3b"; + else if (letter.equals("n")) unicode = "\u1e49"; + else if (letter.equals("r")) unicode = "\u1e5f"; + else if (letter.equals("t")) unicode = "\u1e6f"; + else if (letter.equals("z")) unicode = "\u1e95"; + break; + } + // cedilla case 'c': { if (letter.equals("C")) unicode = "\u00c7"; + else if (letter.equals("D")) unicode = "\u1e10"; + else if (letter.equals("E")) unicode = "\u0228"; + else if (letter.equals("G")) unicode = "\u0122"; + else if (letter.equals("H")) unicode = "\u1e28"; + else if (letter.equals("K")) unicode = "\u0136"; + else if (letter.equals("L")) unicode = "\u013b"; + else if (letter.equals("N")) unicode = "\u0145"; + else if (letter.equals("R")) unicode = "\u0156"; + else if (letter.equals("S")) unicode = "\u015e"; + else if (letter.equals("T")) unicode = "\u0162"; else if (letter.equals("c")) unicode = "\u00e7"; + else if (letter.equals("d")) unicode = "\u1e11"; + else if (letter.equals("e")) unicode = "\u0229"; + else if (letter.equals("g")) unicode = "\u0123"; + else if (letter.equals("h")) unicode = "\u1e29"; + else if (letter.equals("k")) unicode = "\u0137"; + else if (letter.equals("l")) unicode = "\u013c"; + else if (letter.equals("n")) unicode = "\u0146"; + else if (letter.equals("r")) unicode = "\u0157"; + else if (letter.equals("s")) unicode = "\u015f"; + else if (letter.equals("t")) unicode = "\u0163"; break; } + // dot below + case 'd': { + if (letter.equals("A")) unicode = "\u1ea0"; + else if (letter.equals("E")) unicode = "\u1eb8"; + else if (letter.equals("I")) unicode = "\u1eca"; + else if (letter.equals("O")) unicode = "\u1ecc"; + else if (letter.equals("U")) unicode = "\u1ee4"; + else if (letter.equals("Y")) unicode = "\u1ef4"; + else if (letter.equals("a")) unicode = "\u1ea1"; + else if (letter.equals("e")) unicode = "\u1eb9"; + else if (letter.equals("i")) unicode = "\u1ecb"; + else if (letter.equals("o")) unicode = "\u1ecd"; + else if (letter.equals("u")) unicode = "\u1ee5"; + else if (letter.equals("y")) unicode = "\u1ef5"; + break; + } + // ogonek + case 'k': { + if (letter.equals("A")) unicode = "\u0104"; + else if (letter.equals("E")) unicode = "\u0118"; + else if (letter.equals("I")) unicode = "\u012e"; + else if (letter.equals("O")) unicode = "\u01ea"; + else if (letter.equals("U")) unicode = "\u0172"; + else if (letter.equals("a")) unicode = "\u0105"; + else if (letter.equals("e")) unicode = "\u0119"; + else if (letter.equals("i")) unicode = "\u012f"; + else if (letter.equals("o")) unicode = "\u01eb"; + else if (letter.equals("u")) unicode = "\u0173"; + break; + } + // ring above case 'r': { if (letter.equals("A")) unicode = "\u00c5"; else if (letter.equals("U")) unicode = "\u016e"; @@ -999,6 +1110,46 @@ private String convertAccentCommandToUnicode(String accentCommand, String letter else if (letter.equals("u")) unicode = "\u016f"; break; } + // breve + case 'u': { + if (letter.equals("A")) unicode = "\u0102"; + else if (letter.equals("E")) unicode = "\u0114"; + else if (letter.equals("G")) unicode = "\u011e"; + else if (letter.equals("I")) unicode = "\u012c"; + else if (letter.equals("O")) unicode = "\u014e"; + else if (letter.equals("U")) unicode = "\u016c"; + else if (letter.equals("a")) unicode = "\u0103"; + else if (letter.equals("e")) unicode = "\u0115"; + else if (letter.equals("g")) unicode = "\u011f"; + else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u012d"; + else if (letter.equals("o")) unicode = "\u014f"; + else if (letter.equals("u")) unicode = "\u016d"; + break; + } + // caron + case 'v': { + if (letter.equals("C")) unicode = "\u010c"; + else if (letter.equals("D")) unicode = "\u010e"; + else if (letter.equals("E")) unicode = "\u011a"; + else if (letter.equals("L")) unicode = "\u013d"; + else if (letter.equals("N")) unicode = "\u0147"; + else if (letter.equals("R")) unicode = "\u0158"; + else if (letter.equals("S")) unicode = "\u0160"; + else if (letter.equals("T")) unicode = "\u0164"; + else if (letter.equals("Z")) unicode = "\u017d"; + else if (letter.equals("c")) unicode = "\u010d"; + else if (letter.equals("d")) unicode = "\u010f"; + else if (letter.equals("e")) unicode = "\u011b"; + else if (letter.equals("i") || letter.equals("\\i")) unicode = "\u01d0"; + else if (letter.equals("j") || letter.equals("\\j")) unicode = "\u01f0"; + else if (letter.equals("l")) unicode = "\u013e"; + else if (letter.equals("n")) unicode = "\u0148"; + else if (letter.equals("r")) unicode = "\u0159"; + else if (letter.equals("s")) unicode = "\u0161"; + else if (letter.equals("t")) unicode = "\u0165"; + else if (letter.equals("z")) unicode = "\u017e"; + break; + } default: { break; } diff --git a/ltexls-core/src/test/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilderTest.java b/ltexls-core/src/test/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilderTest.java index e6348ad9..e63a7e0a 100644 --- a/ltexls-core/src/test/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilderTest.java +++ b/ltexls-core/src/test/java/org/bsplines/ltexls/parsing/latex/LatexAnnotatedTextBuilderTest.java @@ -91,25 +91,98 @@ public void testTextMode() { "\u00cb\u00efn T\u00ebxt m\u00eft v\u00ef\u00ebl\u00ebn " + "\u00dcml\u00e4\u00fct\u00ebn. "); assertPlainText( - "\\AA\\O\\aa\\ss\\o" - + "\\`A\\`E\\`I\\`O\\`U\\`a\\`e\\`i\\`\\i\\`o\\`u" - + "\\'A\\'E\\'I\\'O\\'U\\'Y\\'a\\'e\\'i\\'\\i\\'o\\'u\\'y" - + "\\^A\\^E\\^I\\^O\\^U\\^Y\\^a\\^e\\^i\\^\\i\\^o\\^u\\^y" - + "\\~A\\~E\\~I\\~N\\~O\\~U\\~a\\~e\\~i\\~\\i\\~n\\~o\\~u" - + "\\\"A\\\"E\\\"I\\\"O\\\"U\\\"Y\\\"a\\\"e\\\"i\\\"\\i\\\"o\\\"u\\\"y" - + "\\=A\\=E\\=I\\=O\\=U\\=Y\\=a\\=e\\=i\\=\\i\\=o\\=u\\=y" - + "\\.A\\.E\\.I\\.O\\.a\\.e\\.o", - "\u00c5\u00d8\u00e5\u00df\u00f8" - + "\u00c0\u00c8\u00cc\u00d2\u00d9\u00e0\u00e8\u00ec\u00ec\u00f2\u00f9" - + "\u00c1\u00c9\u00cd\u00d3\u00da\u00dd\u00e1\u00e9\u00ed\u00ed\u00f3\u00fa\u00fd" - + "\u00c2\u00ca\u00ce\u00d4\u00db\u0176\u00e2\u00ea\u00ee\u00ee\u00f4\u00fb\u0177" - + "\u00c3\u1ebc\u0128\u00d1\u00d5\u0168\u00e3\u1ebd\u0129\u0129\u00f1\u00f5\u0169" - + "\u00c4\u00cb\u00cf\u00d6\u00dc\u0178\u00e4\u00eb\u00ef\u00ef\u00f6\u00fc\u00ff" - + "\u0100\u0112\u012a\u014c\u016a\u0232\u0101\u0113\u012b\u012b\u014d\u016b\u0233" - + "\u0226\u0116\u0130\u022e\u0227\u0117\u022f"); - assertPlainText( - "\\c{C}\\c c\\r{A}\\r U\\r a\\r u", - "\u00c7\u00e7\u00c5\u016e\u00e5\u016f"); + "\\AA\\L\\O\\SS", + "\u00c5\u0141\u00d8\u1e9e"); + assertPlainText( + "\\aa\\i\\j\\l\\o\\ss", + "\u00e5\u0131\u0237\u0142\u00f8\u00df"); + assertPlainText( + "\\`A\\`E\\`I\\`O\\`U", + "\u00c0\u00c8\u00cc\u00d2\u00d9"); + assertPlainText( + "\\`a\\`e\\`i\\`\\i\\`o\\`u", + "\u00e0\u00e8\u00ec\u00ec\u00f2\u00f9"); + assertPlainText( + "\\'A\\'E\\'I\\'O\\'U\\'Y", + "\u00c1\u00c9\u00cd\u00d3\u00da\u00dd"); + assertPlainText( + "\\'a\\'e\\'i\\'\\i\\'o\\'u\\'y", + "\u00e1\u00e9\u00ed\u00ed\u00f3\u00fa\u00fd"); + assertPlainText( + "\\^A\\^E\\^I\\^O\\^U\\^Y", + "\u00c2\u00ca\u00ce\u00d4\u00db\u0176"); + assertPlainText( + "\\^a\\^e\\^i\\^\\i\\^j\\^\\j\\^o\\^u\\^y", + "\u00e2\u00ea\u00ee\u00ee\u0135\u0135\u00f4\u00fb\u0177"); + assertPlainText( + "\\~A\\~E\\~I\\~N\\~O\\~U", + "\u00c3\u1ebc\u0128\u00d1\u00d5\u0168"); + assertPlainText( + "\\~a\\~e\\~i\\~\\i\\~n\\~o\\~u", + "\u00e3\u1ebd\u0129\u0129\u00f1\u00f5\u0169"); + assertPlainText( + "\\\"A\\\"E\\\"I\\\"O\\\"U\\\"Y", + "\u00c4\u00cb\u00cf\u00d6\u00dc\u0178"); + assertPlainText( + "\\\"a\\\"e\\\"i\\\"\\i\\\"o\\\"u\\\"y", + "\u00e4\u00eb\u00ef\u00ef\u00f6\u00fc\u00ff"); + assertPlainText( + "\\=A\\=E\\=I\\=O\\=U\\=Y", + "\u0100\u0112\u012a\u014c\u016a\u0232"); + assertPlainText( + "\\=a\\=e\\=i\\=\\i\\=o\\=u\\=y", + "\u0101\u0113\u012b\u012b\u014d\u016b\u0233"); + assertPlainText( + "\\.A\\.E\\.I\\.O", + "\u0226\u0116\u0130\u022e"); + assertPlainText( + "\\.a\\.e\\.o", + "\u0227\u0117\u022f"); + assertPlainText( + "\\H{O}\\H{U}\\H{o}\\H{u}", + "\u0150\u0170\u0151\u0171"); + assertPlainText( + "\\b{B}\\b{D}\\b{K}\\b{L}\\b{N}\\b{R}\\b{T}\\b{Z}", + "\u1e06\u1e0e\u1e34\u1e3a\u1e48\u1e5e\u1e6e\u1e94"); + assertPlainText( + "\\b{b}\\b{d}\\b{h}\\b{k}\\b{l}\\b{n}\\b{r}\\b{t}\\b{z}", + "\u1e07\u1e0f\u1e96\u1e35\u1e3b\u1e49\u1e5f\u1e6f\u1e95"); + assertPlainText( + "\\c{C}\\c{D}\\c{E}\\c{G}\\c{H}\\c{K}\\c{L}\\c{N}\\c{R}\\c{S}\\c{T}", + "\u00c7\u1e10\u0228\u0122\u1e28\u0136\u013b\u0145\u0156\u015e\u0162"); + assertPlainText( + "\\c{c}\\c{d}\\c{e}\\c{g}\\c{h}\\c{k}\\c{l}\\c{n}\\c{r}\\c{s}\\c{t}", + "\u00e7\u1e11\u0229\u0123\u1e29\u0137\u013c\u0146\u0157\u015f\u0163"); + assertPlainText( + "\\d{A}\\d{E}\\d{I}\\d{O}\\d{U}\\d{Y}", + "\u1ea0\u1eb8\u1eca\u1ecc\u1ee4\u1ef4"); + assertPlainText( + "\\d{a}\\d{e}\\d{i}\\d{o}\\d{u}\\d{y}", + "\u1ea1\u1eb9\u1ecb\u1ecd\u1ee5\u1ef5"); + assertPlainText( + "\\k{A}\\k{E}\\k{I}\\k{O}\\k{U}", + "\u0104\u0118\u012e\u01ea\u0172"); + assertPlainText( + "\\k{a}\\k{e}\\k{i}\\k{o}\\k{u}", + "\u0105\u0119\u012f\u01eb\u0173"); + assertPlainText( + "\\r{A}\\r{U}", + "\u00c5\u016e"); + assertPlainText( + "\\r{a}\\r{u}", + "\u00e5\u016f"); + assertPlainText( + "\\u{A}\\u{E}\\u{G}\\u{I}\\u{O}\\u{U}", + "\u0102\u0114\u011e\u012c\u014e\u016c"); + assertPlainText( + "\\v{C}\\v{D}\\v{E}\\v{L}\\v{N}\\v{R}\\v{S}\\v{T}\\v{Z}", + "\u010c\u010e\u011a\u013d\u0147\u0158\u0160\u0164\u017d"); + assertPlainText( + "\\v{c}\\v{d}\\v{e}\\v{i}\\v{\\i}\\v{j}\\v{\\j}\\v{l}\\v{n}\\v{r}\\v{s}\\v{t}\\v{z}", + "\u010d\u010f\u011b\u01d0\u01d0\u01f0\u01f0\u013e\u0148\u0159\u0161\u0165\u017e"); + assertPlainText( + "\\u{a}\\u{e}\\u{g}\\u{i}\\u{\\i}\\u{o}\\u{u}", + "\u0103\u0115\u011f\u012d\u012d\u014f\u016d"); assertPlainText( "This is a test: a, b, \\dots, c.\n", "This is a test: a, b, \u2026, c. ");