Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12309,13 +12309,20 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source,
SourceLocation CC) {
assert(Source->isUnicodeCharacterType() && Target->isUnicodeCharacterType() &&
Source != Target);

// Lone surrogates have a distinct representation in UTF-32.
// Converting betweem UTF-16 and UTF-32 codepoint seems very widespread,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Converting betweem UTF-16 and UTF-32 codepoint seems very widespread,
// Converting between UTF-16 and UTF-32 codepoints seems very widespread,

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to allow this as an opt-in warning so folks who want to catch those conversions still can?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did consider that but given feedback we probably want to backport this change so I kept it simple

// so don't warn on such conversion.
if (Source->isChar16Type() && Target->isChar32Type())
return;

Expr::EvalResult Result;
if (E->EvaluateAsInt(Result, S.getASTContext(), Expr::SE_AllowSideEffects,
S.isConstantEvaluatedContext())) {
llvm::APSInt Value(32);
Value = Result.Val.getInt();
bool IsASCII = Value <= 0x7F;
bool IsBMP = Value <= 0xD7FF || (Value >= 0xE000 && Value <= 0xFFFF);
bool IsBMP = Value <= 0xDFFF || (Value >= 0xE000 && Value <= 0xFFFF);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is not really explain in the summary or at least I don't get it. I guess this cover the char32 to char16 D800 test below.

bool ConversionPreservesSemantics =
IsASCII || (!Source->isChar8Type() && !Target->isChar8Type() && IsBMP);

Expand Down
8 changes: 4 additions & 4 deletions clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
c16(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' may lose precision and change the meaning of the represented code unit}}

c32(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' may change the meaning of the represented code unit}}
c32(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' may change the meaning of the represented code unit}}
c32(u16);
c32(u32);


Expand All @@ -30,7 +30,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
c16(char32_t(0x7f));
c16(char32_t(0x80));
c16(char32_t(0xD7FF));
c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}}
c16(char32_t(0xD800));
c16(char32_t(0xE000));
c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code point '🐉'}}

Expand All @@ -44,8 +44,8 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
c32(char16_t(0x80));

c32(char16_t(0xD7FF));
c32(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xD800>'}}
c32(char16_t(0xDFFF)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xDFFF>'}}
c32(char16_t(0xD800));
c32(char16_t(0xDFFF));
c32(char16_t(0xE000));
c32(char16_t(u'☕'));

Expand Down