Skip to content

Commit

Permalink
Add _verified_utf32_char to ustring.cpp, to reduce duplicate logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivorforce committed Jan 2, 2025
1 parent 2582793 commit 5b9dde4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 45 deletions.
67 changes: 24 additions & 43 deletions core/string/ustring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,25 @@ void String::parse_utf32(const StrRange<char32_t> &p_cstr) {
copy_from_unchecked(p_cstr.c_str, p_cstr.len);
}

constexpr char32_t _verified_utf32_char(char32_t p_char, bool* failure = nullptr) {
if ((p_char & 0xfffff800) == 0xd800) {
String::print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
if (failure) {
*failure = true;
}
return String::_replacement_char;
}
if (p_char > 0x10ffff) {
String::print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
if (failure) {
*failure = true;
}
return String::_replacement_char;
}

return p_char;
}

void String::parse_utf32(const char32_t &p_char) {
if (p_char == 0) {
print_unicode_error("NUL character", true);
Expand All @@ -336,17 +355,7 @@ void String::parse_utf32(const char32_t &p_char) {
resize(2);

char32_t *dst = ptrw();

if ((p_char & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
dst[0] = _replacement_char;
} else if (p_char > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
dst[0] = _replacement_char;
} else {
dst[0] = p_char;
}

dst[0] = _verified_utf32_char(p_char);
dst[1] = 0;
}

Expand All @@ -361,18 +370,7 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
char32_t *dst = ptrw();

for (; p_char < end; ++p_char, ++dst) {
const char32_t chr = *p_char;
if ((chr & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr));
*dst = _replacement_char;
continue;
}
if (chr > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr));
*dst = _replacement_char;
continue;
}
*dst = chr;
*dst = _verified_utf32_char(*p_char);
}
*dst = 0;
}
Expand Down Expand Up @@ -490,16 +488,7 @@ String &String::operator+=(char32_t p_char) {
resize(lhs_len + 2);
char32_t *dst = ptrw();

if ((p_char & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
dst[lhs_len] = _replacement_char;
} else if (p_char > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
dst[lhs_len] = _replacement_char;
} else {
dst[lhs_len] = p_char;
}

dst[lhs_len] = _verified_utf32_char(p_char);
dst[lhs_len + 1] = 0;

return *this;
Expand Down Expand Up @@ -1923,7 +1912,7 @@ Vector<uint8_t> String::hex_decode() const {
#undef HEX_TO_BYTE
}

void String::print_unicode_error(const String &p_message, bool p_critical) const {
void String::print_unicode_error(const String &p_message, bool p_critical) {
if (p_critical) {
print_error(vformat(U"Unicode parsing error, some characters were replaced with � (U+FFFD): %s", p_message));
} else {
Expand Down Expand Up @@ -2111,16 +2100,8 @@ Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
print_unicode_error("NUL character", true);
decode_failed = true;
unichar = _replacement_char;
} else if ((unichar & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", unichar), true);
decode_failed = true;
unichar = _replacement_char;
} else if (unichar > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar), true);
decode_failed = true;
unichar = _replacement_char;
}
*(dst++) = unichar;
*(dst++) = _verified_utf32_char(unichar, &decode_failed);
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions core/string/ustring.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ class CharString {
class String {
CowData<char32_t> _cowdata;
static const char32_t _null;
static const char32_t _replacement_char;

// Known-length copy.
void parse_latin1(const StrRange<char> &p_cstr);
Expand Down Expand Up @@ -321,6 +320,8 @@ class String {
String _camelcase_to_underscore() const;

public:
static const char32_t _replacement_char;

enum {
npos = -1 ///<for "some" compatibility with std::string (npos is a huge value in std::string)
};
Expand Down Expand Up @@ -396,7 +397,7 @@ class String {
bool is_valid_string() const;

/* debug, error messages */
void print_unicode_error(const String &p_message, bool p_critical = false) const;
static void print_unicode_error(const String &p_message, bool p_critical = false);

/* complex helpers */
String substr(int p_from, int p_chars = -1) const;
Expand Down

0 comments on commit 5b9dde4

Please sign in to comment.