diff --git a/include/fmt/chrono.h b/include/fmt/chrono.h index ad82dc41efdba..7cd007e49cd2e 100644 --- a/include/fmt/chrono.h +++ b/include/fmt/chrono.h @@ -482,8 +482,16 @@ inline size_t strftime(wchar_t* str, size_t count, const wchar_t* format, return wcsftime(str, count, format, time); } +template inline FMT_CONSTEXPR bool is_ascii_char(Char c) { + return static_cast(c) <= 127u; +} + +inline FMT_CONSTEXPR void set_bit(uint8_t& flags, uint8_t bit_number) { + flags |= 1 << bit_number; +} + // Writes two-digit numbers a, b and c separated by sep to buf. -// The method by Pavel Novikov based on +// based on // https://johnnylee-sde.github.io/Fast-unsigned-integer-to-time-string/. inline void write_digit2_separated(char* buf, unsigned a, unsigned b, unsigned c, char sep) { @@ -508,22 +516,27 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b, memcpy(buf, &digits, 8); } +inline unsigned year_from_tm(std::tm tm) { return tm.tm_year + 1900; } +inline unsigned month_from_tm(std::tm tm) { return tm.tm_mon + 1; } + FMT_END_DETAIL_NAMESPACE template struct formatter, Char> : formatter { - FMT_CONSTEXPR formatter() { - this->specs = {default_specs, sizeof(default_specs) / sizeof(Char)}; - } - template FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { auto it = ctx.begin(); if (it != ctx.end() && *it == ':') ++it; auto end = it; while (end != ctx.end() && *end != '}') ++end; - if (end != it) this->specs = {it, detail::to_unsigned(end - it)}; + if (end == it) { + formatter::parse(basic_string_view{ + default_specs, sizeof(default_specs) / sizeof(Char)}); + } else { + formatter::parse( + basic_string_view{it, detail::to_unsigned(end - it)}); + } return end; } @@ -544,30 +557,218 @@ constexpr Char template struct formatter { private: - enum class spec { - unknown, - year_month_day, - hh_mm_ss, + enum class token_type : uint8_t { + empty, + single_char, + char_span, + year_decimals, + year_2decimals, + month_decimals, + day_of_year, + day_of_month, + day_of_week, + iso_day_of_week, + hours, + minutes, + seconds, + month_day_year_2decimals, + iso_date, + hours_minutes, + iso_time, + offset_from_utc, + + invalid = 255 + }; + struct spec_token { + token_type type; + union { + Char single_char; + struct { + uint8_t begin, end; // offsets from 'specs.begin()' + } char_span; + }; + }; + static constexpr size_t allowed_token_count = 12; + spec_token spec_tokens[allowed_token_count] = {{token_type::invalid}}; + + enum validation : uint8_t { + year, + month, + day_of_year, // not yet supported + day_of_month, + day_of_week, + hours, + minutes, + seconds }; - spec spec_ = spec::unknown; + uint8_t needed_validation = 0; public: basic_string_view specs; template FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { - auto it = ctx.begin(); - if (it != ctx.end() && *it == ':') ++it; - auto end = it; + auto begin = ctx.begin(); + if (begin != ctx.end() && *begin == ':') ++begin; + auto end = begin; while (end != ctx.end() && *end != '}') ++end; - auto size = detail::to_unsigned(end - it); - specs = {it, size}; - // basic_string_view<>::compare isn't constexpr before C++17 - if (specs.size() == 2 && specs[0] == Char('%')) { - if (specs[1] == Char('F')) - spec_ = spec::year_month_day; - else if (specs[1] == Char('T')) - spec_ = spec::hh_mm_ss; + const auto size = detail::to_unsigned(end - begin); + specs = {begin, size}; + + if (begin == end) { + spec_tokens[0].type = token_type::empty; + return end; + } + // fast formatting is only supported for short format strings + if (size < 256) { + using detail::set_bit; + + spec_token tokens[allowed_token_count] = {}; + const auto tokens_end = std::end(tokens); + auto token = tokens; + auto it = begin; + while (it != end) { + if (*it == '%') { + if (++it == end) // format string seems to be invalid, + return end; // let 'strftime' deal with it + + switch (*it++) { + case '%': + token->type = token_type::single_char; + token->single_char = '%'; + break; + + case 'n': + token->type = token_type::single_char; + token->single_char = '\n'; + break; + + case 't': + token->type = token_type::single_char; + token->single_char = '\t'; + break; + + case 'Y': + token->type = token_type::year_decimals; + set_bit(needed_validation, validation::year); + break; + + case 'y': + token->type = token_type::year_2decimals; + set_bit(needed_validation, validation::year); + break; + + case 'm': + token->type = token_type::month_decimals; + set_bit(needed_validation, validation::month); + break; + + // case 'j': // TODO: support it later + // token->type = token_type::day_of_year; + // break; + + case 'd': + token->type = token_type::day_of_month; + set_bit(needed_validation, validation::day_of_month); + break; + + case 'w': + token->type = token_type::day_of_week; + set_bit(needed_validation, validation::day_of_week); + break; + + case 'u': + token->type = token_type::iso_day_of_week; + set_bit(needed_validation, validation::day_of_week); + break; + + case 'H': + token->type = token_type::hours; + set_bit(needed_validation, validation::hours); + break; + + case 'M': + token->type = token_type::minutes; + set_bit(needed_validation, validation::minutes); + break; + + case 'S': + token->type = token_type::seconds; + set_bit(needed_validation, validation::seconds); + break; + + case 'D': + token->type = token_type::month_day_year_2decimals; + set_bit(needed_validation, validation::year); + set_bit(needed_validation, validation::month); + set_bit(needed_validation, validation::day_of_month); + break; + + case 'F': + token->type = token_type::iso_date; + set_bit(needed_validation, validation::year); + set_bit(needed_validation, validation::month); + set_bit(needed_validation, validation::day_of_month); + break; + + case 'R': + token->type = token_type::hours_minutes; + set_bit(needed_validation, validation::hours); + set_bit(needed_validation, validation::minutes); + break; + + case 'T': + token->type = token_type::iso_time; + set_bit(needed_validation, validation::hours); + set_bit(needed_validation, validation::minutes); + set_bit(needed_validation, validation::seconds); + break; + + // case 'z': // TODO: support it later + // token->type = token_type::offset_from_utc; + // break; + + // 'C' should write first two decimals of the year, + // what should it write for 10000? nope + + // 'e' should write day of year in [1,31], the leading character + // requirement is too ambiguous (space? no character?), nope + + // 'I' and 'p' should write hours in 12-hour format and "a.m."/p.m." + // or something, too fancy, nope + + // These are all too fancy: + // EY Oy Ey EC G g b h B Om U OU W OW V OV Od Oe a A Ow Ou OH OI OM + // OS c Ec x Ex X EX r Z + + default: // anything fancy is not supported + return end; + } + } else { // it's a char span + const auto span_begin = it; + do { + if (*it == '%') break; + // non-ASCII characters are too fancy + if (!detail::is_ascii_char(*it)) return end; + } while (++it != end); + if (span_begin - it == 1) { + token->type = token_type::single_char; + token->single_char = *span_begin; + } else { + token->type = token_type::char_span; + token->char_span.begin = static_cast(span_begin - begin); + token->char_span.end = static_cast(it - begin); + } + } + ++token; + if (token != tokens_end) { + token->type = token_type::invalid; // set termination + } else if (it != end) { + return end; // we ran out of tokens and there are still more + } + } + for (size_t i = 0; i != allowed_token_count; ++i) + spec_tokens[i] = tokens[i]; } return end; } @@ -575,20 +776,124 @@ template struct formatter { template auto format(const std::tm& tm, FormatContext& ctx) const -> decltype(ctx.out()) { - auto year = 1900 + tm.tm_year; - if (spec_ == spec::year_month_day && year >= 0 && year < 10000) { - char buf[10]; - detail::copy2(buf, detail::digits2(detail::to_unsigned(year / 100))); - detail::write_digit2_separated(buf + 2, year % 100, - detail::to_unsigned(tm.tm_mon + 1), - detail::to_unsigned(tm.tm_mday), '-'); - return std::copy_n(buf, sizeof(buf), ctx.out()); - } else if (spec_ == spec::hh_mm_ss) { - char buf[8]; - detail::write_digit2_separated(buf, detail::to_unsigned(tm.tm_hour), - detail::to_unsigned(tm.tm_min), - detail::to_unsigned(tm.tm_sec), ':'); - return std::copy_n(buf, sizeof(buf), ctx.out()); + switch (spec_tokens[0].type) { + case token_type::empty: + return ctx.out(); + + default: { + using detail::to_unsigned; + // nominal value ranges for std::tm members: + // tm_sec [0, 60] (including leap second) + // tm_min [0, 59] + // tm_hour [0, 23] + // tm_mday [1, 31] + // tm_mon [0, 11] + // tm_year any? (years since 1900) + // tm_wday [0, 6] (days since Sunday) + // tm_yday [0, 365] (days since January 1) + // 'year' = tm_year + 1900, we support 'year' between +1000 and +9999 + const auto validation_result = + (uint8_t(to_unsigned(tm.tm_year + 900) < 9000) << validation::year) | + (uint8_t(to_unsigned(tm.tm_mon) < 12) << validation::month) | + (uint8_t(to_unsigned(tm.tm_mday - 1) < 31) + << validation::day_of_month) | + (uint8_t(to_unsigned(tm.tm_wday) < 7) << validation::day_of_week) | + (uint8_t(to_unsigned(tm.tm_hour) < 24) << validation::hours) | + (uint8_t(to_unsigned(tm.tm_min) < 60) << validation::minutes) | + (uint8_t(to_unsigned(tm.tm_sec) < 61) << validation::seconds); + if ((needed_validation & ~validation_result) != 0) + break; // let 'strftime' deal with weird values + + auto out = ctx.out(); + for (auto& token : spec_tokens) { + switch (token.type) { + case token_type::invalid: + return out; // terminating + + case token_type::single_char: + *out++ = token.single_char; + break; + + case token_type::char_span: + out = std::copy(specs.begin() + token.char_span.begin, + specs.begin() + token.char_span.end, out); + break; + + case token_type::year_decimals: { + const auto year = detail::year_from_tm(tm); + out = std::copy_n(detail::digits2(year / 100), 2, out); + out = std::copy_n(detail::digits2(year % 100), 2, out); + } break; + + case token_type::year_2decimals: + out = std::copy_n(detail::digits2(detail::year_from_tm(tm) % 100), 2, + out); + break; + + case token_type::month_decimals: + out = std::copy_n(detail::digits2(detail::month_from_tm(tm)), 2, out); + break; + + case token_type::day_of_month: + out = std::copy_n(detail::digits2(tm.tm_mday), 2, out); + break; + + case token_type::day_of_week: + *out++ = ('0' + static_cast(tm.tm_wday)); + break; + + case token_type::iso_day_of_week: + *out++ = *("7123456" + tm.tm_wday); + break; + + case token_type::hours: + out = std::copy_n(detail::digits2(tm.tm_hour), 2, out); + break; + + case token_type::minutes: + out = std::copy_n(detail::digits2(tm.tm_min), 2, out); + break; + + case token_type::seconds: + out = std::copy_n(detail::digits2(tm.tm_sec), 2, out); + break; + + case token_type::month_day_year_2decimals: + char buf[8]; + detail::write_digit2_separated(buf, detail::month_from_tm(tm), + tm.tm_mday, + detail::year_from_tm(tm) % 100, '/'); + out = std::copy(buf, buf + 8, out); + break; + + case token_type::iso_date: { + const auto year = detail::year_from_tm(tm); + char buf[10]; + detail::copy2(buf, detail::digits2(year / 100)); + detail::write_digit2_separated( + buf + 2, year % 100, detail::month_from_tm(tm), tm.tm_mday, '-'); + out = std::copy(buf, buf + 10, out); + } break; + + case token_type::hours_minutes: + out = std::copy_n(detail::digits2(tm.tm_hour), 2, out); + *out++ = ':'; + out = std::copy_n(detail::digits2(tm.tm_min), 2, out); + break; + + case token_type::iso_time: { + char buf[8]; + detail::write_digit2_separated(buf, tm.tm_hour, tm.tm_min, tm.tm_sec, + ':'); + out = std::copy(buf, buf + 8, out); + } break; + } + } + return out; + } + + case token_type::invalid: + break; } basic_memory_buffer tm_format; tm_format.append(specs.begin(), specs.end());