|
20 | 20 |
|
21 | 21 | namespace tesseract {
|
22 | 22 |
|
23 |
| -const char *kUTF8LineSeparator = "\u2028"; // "\xe2\x80\xa8"; |
24 |
| -const char *kUTF8ParagraphSeparator = "\u2029"; // "\xe2\x80\xa9"; |
25 |
| -const char *kLRM = "\u200E"; // Left-to-Right Mark |
26 |
| -const char *kRLM = "\u200F"; // Right-to-Left Mark |
27 |
| -const char *kRLE = "\u202A"; // Right-to-Left Embedding |
28 |
| -const char *kPDF = "\u202C"; // Pop Directional Formatting |
29 |
| - |
30 |
| -const char *kHyphenLikeUTF8[] = { |
31 |
| - "-", // ASCII hyphen-minus |
32 |
| - "\u05BE", // word hyphen in hybrew |
33 |
| - "\u2010", // hyphen |
34 |
| - "\u2011", // non-breaking hyphen |
35 |
| - "\u2012", // a hyphen the same width as digits |
36 |
| - "\u2013", // en dash |
37 |
| - "\u2014", // em dash |
38 |
| - "\u2015", // horizontal bar |
39 |
| - "\u2212", // arithmetic minus sign |
40 |
| - "\uFE58", // small em dash |
41 |
| - "\uFE63", // small hyphen-minus |
42 |
| - "\uFF0D", // fullwidth hyphen-minus |
43 |
| - nullptr, // end of our list |
44 |
| -}; |
45 |
| - |
46 |
| -const char *kApostropheLikeUTF8[] = { |
47 |
| - "'", // ASCII apostrophe |
48 |
| - "`", // ASCII backtick |
49 |
| - "\u2018", // opening single quote |
50 |
| - "\u2019", // closing single quote |
51 |
| - "\u2032", // mathematical prime mark |
52 |
| - nullptr, // end of our list. |
53 |
| -}; |
| 23 | +constexpr const char *kLRM = "\u200E"; // Left-to-Right Mark |
| 24 | +constexpr const char *kRLM = "\u200F"; // Right-to-Left Mark |
| 25 | +constexpr const char *kRLE = "\u202A"; // Right-to-Left Embedding |
| 26 | +constexpr const char *kPDF = "\u202C"; // Pop Directional Formatting |
54 | 27 |
|
55 | 28 | } // namespace
|
0 commit comments