From 8c5c428622ee62dc9dc4243adee7df6c2ed385ca Mon Sep 17 00:00:00 2001 From: Roman Koshelev Date: Sun, 12 Sep 2021 18:07:16 +0300 Subject: [PATCH 1/2] Add copy2() constexpr --- include/fmt/format.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/fmt/format.h b/include/fmt/format.h index 5402c2808fb1..db0986a287a1 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1049,11 +1049,19 @@ inline auto equal2(const char* lhs, const char* rhs) -> bool { } // Copies two characters from src to dst. -template void copy2(Char* dst, const char* src) { - *dst++ = static_cast(*src++); - *dst = static_cast(*src); +template +FMT_CONSTEXPR20 FMT_INLINE void copy2(Char* dst, const char* src) { + if (!is_constant_evaluated() && std::is_same::value) { + memcpy(dst, src, 2); + } else { + // We read both bytes before writing so that the compiler can do it in + // one pair of read/write instructions (even if Char aliases char) + char dc0 = *src++; + char dc1 = *src; + *dst++ = static_cast(dc0); + *dst = static_cast(dc1); + } } -FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } template struct format_decimal_result { Iterator begin; From 9052207625336cd8e09d8a4da5e68988a951d708 Mon Sep 17 00:00:00 2001 From: Roman Koshelev Date: Sun, 12 Sep 2021 18:09:05 +0300 Subject: [PATCH 2/2] Removed redundant format_decimal implementation for constexpr context --- include/fmt/format.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/fmt/format.h b/include/fmt/format.h index db0986a287a1..2621cf848003 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1077,14 +1077,6 @@ FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size) FMT_ASSERT(size >= count_digits(value), "invalid digit count"); out += size; Char* end = out; - if (is_constant_evaluated()) { - while (value >= 10) { - *--out = static_cast('0' + value % 10); - value /= 10; - } - *--out = static_cast('0' + value); - return {out, end}; - } while (value >= 100) { // Integer division is slow so do it for a group of two digits instead // of for every digit. The idea comes from the talk by Alexandrescu