Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change pattern representation of fractional seconds; remove obsolete FieldLength variants #5392

Merged
merged 4 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 0 additions & 67 deletions components/datetime/src/fields/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,8 @@ pub enum FieldLength {
/// [LDML documentation in UTS 35](https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns)
/// for more details.
Six,
/// A fixed size format for numeric-only fields that is at most 127 digits.
Fixed(u8),
/// FieldLength::One (numeric), but overridden with a different numbering system
NumericOverride(FieldNumericOverrides),
/// A time zone field with non-standard rules.
TimeZoneFallbackOverride(TimeZoneFallbackOverride),
}

/// First index used for numeric overrides in compact FieldLength representation
Expand All @@ -65,12 +61,6 @@ pub enum FieldLength {
const FIRST_NUMERIC_OVERRIDE: u8 = 17;
/// Last index used for numeric overrides
const LAST_NUMERIC_OVERRIDE: u8 = 31;
/// First index used for time zone fallback overrides
const FIRST_TIME_ZONE_FALLBACK_OVERRIDE: u8 = 32;
/// Last index used for time zone fallback overrides
const LAST_TIME_ZONE_FALLBACK_OVERRIDE: u8 = 40;
/// First index used for fixed size formats in compact FieldLength representation
const FIRST_FIXED: u8 = 128;

impl FieldLength {
#[inline]
Expand All @@ -85,10 +75,6 @@ impl FieldLength {
FieldLength::NumericOverride(o) => FIRST_NUMERIC_OVERRIDE
.saturating_add(*o as u8)
.min(LAST_NUMERIC_OVERRIDE),
FieldLength::TimeZoneFallbackOverride(o) => FIRST_TIME_ZONE_FALLBACK_OVERRIDE
.saturating_add(*o as u8)
.min(LAST_TIME_ZONE_FALLBACK_OVERRIDE),
FieldLength::Fixed(p) => FIRST_FIXED.saturating_add(*p), /* truncate to at most 127 digits to avoid overflow */
}
}

Expand All @@ -104,14 +90,6 @@ impl FieldLength {
idx if (FIRST_NUMERIC_OVERRIDE..=LAST_NUMERIC_OVERRIDE).contains(&idx) => {
Self::NumericOverride((idx - FIRST_NUMERIC_OVERRIDE).try_into()?)
}
idx if (FIRST_TIME_ZONE_FALLBACK_OVERRIDE..=LAST_TIME_ZONE_FALLBACK_OVERRIDE)
.contains(&idx) =>
{
Self::TimeZoneFallbackOverride(
(idx - FIRST_TIME_ZONE_FALLBACK_OVERRIDE).try_into()?,
)
}
idx if idx >= FIRST_FIXED => Self::Fixed(idx - FIRST_FIXED),
_ => return Err(LengthError::InvalidLength),
})
}
Expand All @@ -127,10 +105,6 @@ impl FieldLength {
FieldLength::Narrow => 5,
FieldLength::Six => 6,
FieldLength::NumericOverride(o) => FIRST_NUMERIC_OVERRIDE as usize + o as usize,
FieldLength::TimeZoneFallbackOverride(o) => {
FIRST_TIME_ZONE_FALLBACK_OVERRIDE as usize + o as usize
}
FieldLength::Fixed(p) => p as usize,
}
}

Expand Down Expand Up @@ -244,44 +218,3 @@ impl fmt::Display for FieldNumericOverrides {
self.as_str().fmt(f)
}
}

/// Time zone fallback overrides to support configurations not found
/// in the CLDR datetime field symbol table.
#[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_datetime::fields),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[non_exhaustive]
pub enum TimeZoneFallbackOverride {
/// The short form of this time zone field,
/// but fall back directly to GMT.
ShortOrGmt = 0,
}

impl TryFrom<u8> for TimeZoneFallbackOverride {
type Error = LengthError;
fn try_from(other: u8) -> Result<Self, LengthError> {
Ok(match other {
0 => Self::ShortOrGmt,
_ => return Err(LengthError::InvalidLength),
})
}
}

// impl TimeZoneFallbackOverride {
// /// Convert this to the corresponding string code
// pub fn as_str(self) -> &'static str {
// match self {
// Self::ShortOrGmt => "ShortOrGmt",
// }
// }
// }

// impl fmt::Display for TimeZoneFallbackOverride {
// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// self.as_str().fmt(f)
// }
// }
13 changes: 4 additions & 9 deletions components/datetime/src/fields/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod length;
pub(crate) mod symbols;

use displaydoc::Display;
pub use length::{FieldLength, FieldNumericOverrides, LengthError, TimeZoneFallbackOverride};
pub use length::{FieldLength, FieldNumericOverrides, LengthError};
pub use symbols::*;

use core::{
Expand Down Expand Up @@ -69,6 +69,7 @@ impl Field {
FieldSymbol::Minute => TextOrNumeric::Numeric,
FieldSymbol::Second(second) => second.get_length_type(self.length),
FieldSymbol::TimeZone(zone) => zone.get_length_type(self.length),
FieldSymbol::DecimalSecond(_) => TextOrNumeric::Numeric,
}
}
}
Expand All @@ -94,14 +95,8 @@ impl From<(FieldSymbol, FieldLength)> for Field {
impl TryFrom<(FieldSymbol, usize)> for Field {
type Error = Error;
fn try_from(input: (FieldSymbol, usize)) -> Result<Self, Self::Error> {
let length = if input.0 != FieldSymbol::Second(crate::fields::Second::FractionalSecond) {
FieldLength::from_idx(input.1 as u8).map_err(|_| Self::Error::InvalidLength(input.0))?
} else if input.1 <= 127 {
FieldLength::from_idx(128 + input.1 as u8)
.map_err(|_| Self::Error::InvalidLength(input.0))?
} else {
return Err(Self::Error::InvalidLength(input.0));
};
let length = FieldLength::from_idx(input.1 as u8)
.map_err(|_| Self::Error::InvalidLength(input.0))?;
Ok(Self {
symbol: input.0,
length,
Expand Down
132 changes: 109 additions & 23 deletions components/datetime/src/fields/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#[cfg(any(feature = "datagen", feature = "experimental"))]
use crate::fields::FieldLength;
#[cfg(any(feature = "datagen", feature = "experimental"))]
use crate::neo_skeleton::FractionalSecondDigits;
use core::{cmp::Ordering, convert::TryFrom};
use displaydoc::Display;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -55,10 +57,13 @@ pub enum FieldSymbol {
Hour(Hour),
/// Minute number within an hour.
Minute,
/// Seconds number within a minute, including fractional seconds, or milliseconds within a day.
/// Seconds integer within a minute or milliseconds within a day.
Second(Second),
/// Time zone as a name, a zone ID, or a ISO 8601 numerical offset.
TimeZone(TimeZone),
/// Seconds with fractional digits. If seconds are an integer,
/// [`FieldSymbol::Second`] is used.
DecimalSecond(DecimalSecond),
}

impl FieldSymbol {
Expand Down Expand Up @@ -110,6 +115,7 @@ impl FieldSymbol {
FieldSymbol::Minute => (8, 0),
FieldSymbol::Second(second) => (9, second.idx()),
FieldSymbol::TimeZone(tz) => (10, tz.idx()),
FieldSymbol::DecimalSecond(second) => (11, second.idx()),
};
let result = high << 4;
result | low
Expand All @@ -134,13 +140,14 @@ impl FieldSymbol {
8 if low == 0 => Self::Minute,
9 => Self::Second(Second::from_idx(low)?),
10 => Self::TimeZone(TimeZone::from_idx(low)?),
11 => Self::DecimalSecond(DecimalSecond::from_idx(low)?),
_ => return Err(SymbolError::InvalidIndex(idx)),
})
}

/// Returns the index associated with this FieldSymbol.
#[cfg(any(feature = "datagen", feature = "experimental"))] // only referenced in experimental code
fn discriminant_idx(&self) -> u8 {
fn idx_for_skeleton(&self) -> u8 {
match self {
FieldSymbol::Era => 0,
FieldSymbol::Year(_) => 1,
Expand All @@ -151,16 +158,37 @@ impl FieldSymbol {
FieldSymbol::DayPeriod(_) => 6,
FieldSymbol::Hour(_) => 7,
FieldSymbol::Minute => 8,
FieldSymbol::Second(_) => 9,
FieldSymbol::Second(_) | FieldSymbol::DecimalSecond(_) => 9,
FieldSymbol::TimeZone(_) => 10,
}
}

/// Compares this enum with other solely based on the enum variant,
/// ignoring the enum's data.
///
/// Second and DecimalSecond are considered equal.
#[cfg(any(feature = "datagen", feature = "experimental"))] // only referenced in experimental code
pub(crate) fn discriminant_cmp(&self, other: &Self) -> Ordering {
self.discriminant_idx().cmp(&other.discriminant_idx())
pub(crate) fn skeleton_cmp(&self, other: &Self) -> Ordering {
self.idx_for_skeleton().cmp(&other.idx_for_skeleton())
}

#[cfg(any(feature = "datagen", feature = "experimental"))]
pub(crate) fn from_fractional_second_digits(
fractional_second_digits: FractionalSecondDigits,
) -> Self {
use FractionalSecondDigits::*;
match fractional_second_digits {
F0 => FieldSymbol::Second(Second::Second),
F1 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF1),
F2 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF2),
F3 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF3),
F4 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF4),
F5 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF5),
F6 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF6),
F7 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF7),
F8 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF8),
F9 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF9),
}
}

/// UTS 35 defines several 1 and 2 symbols to be the same as 3 symbols (abbreviated).
Expand Down Expand Up @@ -273,15 +301,23 @@ impl FieldSymbol {
Self::Hour(Hour::H24) => 21,
Self::Minute => 22,
Self::Second(Second::Second) => 23,
Self::Second(Second::FractionalSecond) => 24,
Self::Second(Second::Millisecond) => 25,
Self::TimeZone(TimeZone::LowerZ) => 26,
Self::TimeZone(TimeZone::UpperZ) => 27,
Self::TimeZone(TimeZone::UpperO) => 28,
Self::TimeZone(TimeZone::LowerV) => 29,
Self::TimeZone(TimeZone::UpperV) => 30,
Self::TimeZone(TimeZone::LowerX) => 31,
Self::TimeZone(TimeZone::UpperX) => 32,
Self::Second(Second::Millisecond) => 24,
Self::DecimalSecond(DecimalSecond::SecondF1) => 31,
Self::DecimalSecond(DecimalSecond::SecondF2) => 32,
Self::DecimalSecond(DecimalSecond::SecondF3) => 33,
Self::DecimalSecond(DecimalSecond::SecondF4) => 34,
Self::DecimalSecond(DecimalSecond::SecondF5) => 35,
Self::DecimalSecond(DecimalSecond::SecondF6) => 36,
Self::DecimalSecond(DecimalSecond::SecondF7) => 37,
Self::DecimalSecond(DecimalSecond::SecondF8) => 38,
Self::DecimalSecond(DecimalSecond::SecondF9) => 39,
Self::TimeZone(TimeZone::LowerZ) => 100,
Self::TimeZone(TimeZone::UpperZ) => 101,
Self::TimeZone(TimeZone::UpperO) => 102,
Self::TimeZone(TimeZone::LowerV) => 103,
Self::TimeZone(TimeZone::UpperV) => 104,
Self::TimeZone(TimeZone::LowerX) => 105,
Self::TimeZone(TimeZone::UpperX) => 106,
}
}
}
Expand Down Expand Up @@ -314,6 +350,7 @@ impl TryFrom<char> for FieldSymbol {
})
.or_else(|_| Second::try_from(ch).map(Self::Second))
.or_else(|_| TimeZone::try_from(ch).map(Self::TimeZone))
// Note: char-to-enum conversion for DecimalSecond is handled directly in the parser
}
}

Expand All @@ -331,6 +368,8 @@ impl From<FieldSymbol> for char {
FieldSymbol::Minute => 'm',
FieldSymbol::Second(second) => second.into(),
FieldSymbol::TimeZone(time_zone) => time_zone.into(),
// Note: This is only used for representing the integer portion
FieldSymbol::DecimalSecond(_) => 's',
}
}
}
Expand Down Expand Up @@ -509,10 +548,6 @@ impl LengthType for Month {
FieldLength::Wide => TextOrNumeric::Text,
FieldLength::Narrow => TextOrNumeric::Text,
FieldLength::Six => TextOrNumeric::Text,
FieldLength::Fixed(_) | FieldLength::TimeZoneFallbackOverride(_) => {
debug_assert!(false, "Invalid field length for month");
TextOrNumeric::Text
}
}
}
}
Expand Down Expand Up @@ -553,19 +588,18 @@ field_type!(
HourULE
);

// NOTE: 'S' FractionalSecond is represented via DecimalSecond,
// so it is not included in the Second enum.

field_type!(
/// An enum for the possible symbols of a second field in a date pattern.
Second; {
/// Field symbol for second (numeric).
's' => Second = 0,
/// Field symbol for fractional second (numeric).
///
/// Produces the number of digits specified by the field length.
'S' => FractionalSecond = 1,
/// Field symbol for milliseconds in day (numeric).
///
/// This field behaves exactly like a composite of all time-related fields, not including the zone fields.
'A' => Millisecond = 2,
'A' => Millisecond = 1,
};
Numeric;
SecondULE
Expand Down Expand Up @@ -697,3 +731,55 @@ impl LengthType for TimeZone {
}
}
}

/// A second field with fractional digits.
#[derive(
Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy, yoke::Yokeable, zerofrom::ZeroFrom,
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_datetime::fields),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[allow(clippy::enum_variant_names)]
#[repr(u8)]
#[zerovec::make_ule(DecimalSecondULE)]
#[zerovec::derive(Debug)]
#[allow(clippy::exhaustive_enums)] // used in data struct
pub enum DecimalSecond {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question, n-b: what's the advantage of this over DecimalSecond(u8)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Can exhaustively match
  • More niches
  • Encodes the invariants into the type itself

/// A second with 1 fractional digit: "1.0"
SecondF1 = 1,
/// A second with 2 fractional digits: "1.00"
SecondF2 = 2,
/// A second with 3 fractional digits: "1.000"
SecondF3 = 3,
/// A second with 4 fractional digits: "1.0000"
SecondF4 = 4,
/// A second with 5 fractional digits: "1.00000"
SecondF5 = 5,
/// A second with 6 fractional digits: "1.000000"
SecondF6 = 6,
/// A second with 7 fractional digits: "1.0000000"
SecondF7 = 7,
/// A second with 8 fractional digits: "1.00000000"
SecondF8 = 8,
/// A second with 9 fractional digits: "1.000000000"
SecondF9 = 9,
}

impl DecimalSecond {
#[inline]
pub(crate) fn idx(self) -> u8 {
self as u8
}
#[inline]
pub(crate) fn from_idx(idx: u8) -> Result<Self, SymbolError> {
Self::new_from_u8(idx).ok_or(SymbolError::InvalidIndex(idx))
}
}
impl From<DecimalSecond> for FieldSymbol {
fn from(input: DecimalSecond) -> Self {
Self::DecimalSecond(input)
}
}
Loading