Skip to content

Commit 334d9b4

Browse files
committed
unicodes: Optimize code by using constexpr and removing unused globals
Signed-off-by: Stefan Weil <[email protected]>
1 parent 23d05a5 commit 334d9b4

File tree

2 files changed

+8
-43
lines changed

2 files changed

+8
-43
lines changed

src/ccutil/unicodes.cpp

+4-31
Original file line numberDiff line numberDiff line change
@@ -20,36 +20,9 @@
2020

2121
namespace tesseract {
2222

23-
const char *kUTF8LineSeparator = "\u2028"; // "\xe2\x80\xa8";
24-
const char *kUTF8ParagraphSeparator = "\u2029"; // "\xe2\x80\xa9";
25-
const char *kLRM = "\u200E"; // Left-to-Right Mark
26-
const char *kRLM = "\u200F"; // Right-to-Left Mark
27-
const char *kRLE = "\u202A"; // Right-to-Left Embedding
28-
const char *kPDF = "\u202C"; // Pop Directional Formatting
29-
30-
const char *kHyphenLikeUTF8[] = {
31-
"-", // ASCII hyphen-minus
32-
"\u05BE", // word hyphen in hybrew
33-
"\u2010", // hyphen
34-
"\u2011", // non-breaking hyphen
35-
"\u2012", // a hyphen the same width as digits
36-
"\u2013", // en dash
37-
"\u2014", // em dash
38-
"\u2015", // horizontal bar
39-
"\u2212", // arithmetic minus sign
40-
"\uFE58", // small em dash
41-
"\uFE63", // small hyphen-minus
42-
"\uFF0D", // fullwidth hyphen-minus
43-
nullptr, // end of our list
44-
};
45-
46-
const char *kApostropheLikeUTF8[] = {
47-
"'", // ASCII apostrophe
48-
"`", // ASCII backtick
49-
"\u2018", // opening single quote
50-
"\u2019", // closing single quote
51-
"\u2032", // mathematical prime mark
52-
nullptr, // end of our list.
53-
};
23+
constexpr const char *kLRM = "\u200E"; // Left-to-Right Mark
24+
constexpr const char *kRLM = "\u200F"; // Right-to-Left Mark
25+
constexpr const char *kRLE = "\u202A"; // Right-to-Left Embedding
26+
constexpr const char *kPDF = "\u202C"; // Pop Directional Formatting
5427

5528
} // namespace

src/ccutil/unicodes.h

+4-12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
* File: unicodes.h
33
* Description: Unicode related machinery
44
* Author: David Eger
5-
* Created: Wed Jun 15 16:37:50 PST 2011
65
*
76
* (C) Copyright 2011, Google, Inc.
87
** Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,17 +21,10 @@
2221

2322
namespace tesseract {
2423

25-
extern const char *kUTF8LineSeparator;
26-
extern const char *kUTF8ParagraphSeparator;
27-
extern const char *kLRM; ///< Left-to-Right Mark
28-
extern const char *kRLM; ///< Right-to-Left Mark
29-
extern const char *kRLE; ///< Right-to-Left Embedding
30-
extern const char *kPDF; ///< Pop Directional Formatting
31-
32-
/// The following are confusable internal word punctuation symbols
33-
/// which we normalize to the first variant when matching in dawgs.
34-
extern const char *kHyphenLikeUTF8[];
35-
extern const char *kApostropheLikeUTF8[];
24+
extern const char* const kLRM; ///< Left-to-Right Mark
25+
extern const char* const kRLM; ///< Right-to-Left Mark
26+
extern const char* const kRLE; ///< Right-to-Left Embedding
27+
extern const char* const kPDF; ///< Pop Directional Formatting
3628

3729
} // namespace
3830

0 commit comments

Comments
 (0)