diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 72b36f55f53..18865a44a44 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -660,6 +660,8 @@ jobs: # icu_benchmark_memory cli. examples: - >- + icu_calendar/iso_date_manipulations + icu_calendar/iso_datetime_manipulations icu_datetime/work_log icu_locid/syntatically_canonicalize_locales icu_locid/filter_langids diff --git a/Cargo.lock b/Cargo.lock index 3a2c88b3bcc..d70d020baa3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,6 +1305,7 @@ dependencies = [ "criterion", "displaydoc", "icu", + "icu_benchmark_macros", "icu_calendar", "icu_locid", "icu_provider", @@ -1680,7 +1681,6 @@ dependencies = [ name = "icu_provider_blob" version = "0.6.0" dependencies = [ - "erased-serde", "icu_locid", "icu_provider", "litemap", @@ -1700,7 +1700,6 @@ dependencies = [ "criterion", "crlify", "displaydoc", - "erased-serde", "icu_benchmark_macros", "icu_locid", "icu_provider", diff --git a/components/calendar/Cargo.toml b/components/calendar/Cargo.toml index 78918cc6f1d..82af7a19c44 100644 --- a/components/calendar/Cargo.toml +++ b/components/calendar/Cargo.toml @@ -25,6 +25,7 @@ include = [ [features] std = [] +bench = [] serde = ["dep:serde", "zerovec/serde", "tinystr/serde", "icu_provider/serde"] datagen = ["serde", "zerovec/serde_serialize"] @@ -47,6 +48,7 @@ zerovec = { version = "0.7", path = "../../utils/zerovec", default-features = fa [dev-dependencies] criterion = "0.3" icu = { path = "../icu", default-features = false } +icu_benchmark_macros = { version = "0.6", path = "../../tools/benchmark/macros" } icu_calendar = { version = "0.6", path = "../calendar", features = ["serde"] } # Dependency required to prevent `delayed_good_path_bugs` error (https://github.com/unicode-org/icu4x/pull/1844#issuecomment-1118111564) icu_testdata = { version = "0.6", path = "../../provider/testdata" } serde = { version = "1.0", features = ["derive"] } @@ -59,3 +61,9 @@ harness = false [[bench]] name = "datetime" harness = false + +[[example]] +name = "iso_date_manipulations" + +[[example]] +name = "iso_datetime_manipulations" diff --git a/components/calendar/examples/iso_date_manipulations.rs b/components/calendar/examples/iso_date_manipulations.rs new file mode 100644 index 00000000000..f475450392d --- /dev/null +++ b/components/calendar/examples/iso_date_manipulations.rs @@ -0,0 +1,62 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// An example application which uses icu_datetime to format entries +// from a log into human readable dates and times. + +#![no_main] // https://github.com/unicode-org/icu4x/issues/395 + +icu_benchmark_macros::static_setup!(); + +use icu_calendar::{Calendar, Date, DateTimeError, Iso}; + +const DATES_ISO: &[(i32, u8, u8)] = &[ + (1970, 1, 1), + (1982, 3, 11), + (1999, 2, 21), + (2000, 12, 29), + (2001, 9, 8), + (2017, 7, 12), + (2020, 2, 29), + (2021, 3, 21), + (2021, 6, 10), + (2021, 9, 2), + (2022, 10, 8), + (2022, 2, 9), + (2033, 6, 10), +]; + +fn print(_date_input: &Date) { + #[cfg(debug_assertions)] + { + let formatted_date = format!( + "Year: {}, Month: {}, Day: {}", + _date_input.year().number, + _date_input.month().number, + _date_input.day_of_month().0, + ); + + println!("{}", formatted_date); + } +} + +fn tuple_to_iso_date(date: (i32, u8, u8)) -> Result, DateTimeError> { + Date::new_iso_date_from_integers(date.0, date.1, date.2) +} + +#[no_mangle] +fn main(_argc: isize, _argv: *const *const u8) -> isize { + icu_benchmark_macros::main_setup!(); + + let dates = DATES_ISO + .iter() + .copied() + .map(tuple_to_iso_date) + .collect::>, _>>() + .expect("Failed to parse dates."); + + dates.iter().map(print).for_each(drop); + + 0 +} diff --git a/components/calendar/examples/iso_datetime_manipulations.rs b/components/calendar/examples/iso_datetime_manipulations.rs new file mode 100644 index 00000000000..61d16c91649 --- /dev/null +++ b/components/calendar/examples/iso_datetime_manipulations.rs @@ -0,0 +1,65 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// An example application which uses icu_datetime to format entries +// from a log into human readable dates and times. + +#![no_main] // https://github.com/unicode-org/icu4x/issues/395 + +icu_benchmark_macros::static_setup!(); + +use icu_calendar::{Calendar, DateTime, DateTimeError, Iso}; + +const DATETIMES_ISO: &[(i32, u8, u8, u8, u8, u8)] = &[ + (1970, 1, 1, 3, 5, 12), + (1982, 3, 11, 2, 25, 59), + (1999, 2, 21, 13, 12, 23), + (2000, 12, 29, 10, 50, 23), + (2001, 9, 8, 11, 5, 5), + (2017, 7, 12, 3, 1, 1), + (2020, 2, 29, 23, 12, 23), + (2021, 3, 21, 18, 35, 34), + (2021, 6, 10, 13, 12, 23), + (2021, 9, 2, 5, 50, 22), + (2022, 10, 8, 9, 45, 32), + (2022, 2, 9, 10, 32, 45), + (2033, 6, 10, 17, 22, 22), +]; + +fn print(_datetime_input: &DateTime) { + #[cfg(debug_assertions)] + { + let formatted_datetime = format!( + "Year: {}, Month: {}, Day: {}, Hour: {}, Minute: {}, Second: {}", + _datetime_input.date.year().number, + _datetime_input.date.month().number, + _datetime_input.date.day_of_month().0, + u8::from(_datetime_input.time.hour), + u8::from(_datetime_input.time.minute), + u8::from(_datetime_input.time.second), + ); + + println!("{}", formatted_datetime); + } +} + +fn tuple_to_iso_datetime(date: (i32, u8, u8, u8, u8, u8)) -> Result, DateTimeError> { + DateTime::new_iso_datetime_from_integers(date.0, date.1, date.2, date.3, date.4, date.5) +} + +#[no_mangle] +fn main(_argc: isize, _argv: *const *const u8) -> isize { + icu_benchmark_macros::main_setup!(); + + let datetimes = DATETIMES_ISO + .iter() + .copied() + .map(tuple_to_iso_datetime) + .collect::>, _>>() + .expect("Failed to parse datetimes."); + + datetimes.iter().map(print).for_each(drop); + + 0 +} diff --git a/provider/blob/Cargo.toml b/provider/blob/Cargo.toml index 5f32b029359..3c91809dd04 100644 --- a/provider/blob/Cargo.toml +++ b/provider/blob/Cargo.toml @@ -37,7 +37,6 @@ zerovec = { version = "0.7", path = "../../utils/zerovec", features = ["serde", # For the export feature log = { version = "0.4", optional = true } litemap = { version = "0.4", path = "../../utils/litemap/", optional = true } -erased-serde = { version = "0.3", default-features = false, features = ["alloc"], optional = true } [dev-dependencies] icu_locid = { version = "0.6", path = "../../components/locid", features = ["serde"] } @@ -54,6 +53,5 @@ export = [ "litemap", "icu_provider/datagen", "zerovec/serde_serialize", - "erased-serde", ] std = ["icu_provider/std"] diff --git a/provider/blob/src/export/blob_exporter.rs b/provider/blob/src/export/blob_exporter.rs index dca25b3013a..abba34f70c2 100644 --- a/provider/blob/src/export/blob_exporter.rs +++ b/provider/blob/src/export/blob_exporter.rs @@ -40,7 +40,7 @@ impl DataExporter for BlobExporter<'_> { let mut serializer = postcard::Serializer { output: postcard::flavors::AllocVec(Vec::new()), }; - payload.serialize(&mut ::erase(&mut serializer))?; + payload.serialize(&mut serializer)?; self.resources.lock().unwrap().push(( key.get_hash(), options.write_to_string().into_owned().into_bytes(), diff --git a/provider/core/Cargo.toml b/provider/core/Cargo.toml index 1eae83d0a24..f0c63df3616 100644 --- a/provider/core/Cargo.toml +++ b/provider/core/Cargo.toml @@ -50,7 +50,7 @@ deserialize_bincode_1 = ["serde", "bincode", "std"] deserialize_postcard_07 = ["serde", "postcard"] # Dependencies for running data generation -datagen = ["dhat", "serde", "erased-serde", "crabbake", "std"] +datagen = ["dhat", "serde", "erased-serde", "crabbake", "std", "serde_json"] [dependencies] icu_locid = { version = "0.6", path = "../../components/locid" } diff --git a/provider/core/src/buf.rs b/provider/core/src/buf.rs index 1244176b2fa..dc76c33c59a 100644 --- a/provider/core/src/buf.rs +++ b/provider/core/src/buf.rs @@ -67,3 +67,23 @@ pub enum BufferFormat { /// Serialize using Postcard version 0.7. Postcard07, } + +impl BufferFormat { + /// Returns an error if the buffer format is not enabled. + pub fn check_available(&self) -> Result<(), DataError> { + match self { + #[cfg(feature = "deserialize_json")] + BufferFormat::Json => Ok(()), + + #[cfg(feature = "deserialize_bincode_1")] + BufferFormat::Bincode1 => Ok(()), + + #[cfg(feature = "deserialize_postcard_07")] + BufferFormat::Postcard07 => Ok(()), + + // Allowed for cases in which all features are enabled + #[allow(unreachable_patterns)] + _ => Err(DataErrorKind::UnavailableBufferFormat(*self).into_error()), + } + } +} diff --git a/provider/core/src/error.rs b/provider/core/src/error.rs index fb9772b3d47..404586e481d 100644 --- a/provider/core/src/error.rs +++ b/provider/core/src/error.rs @@ -2,6 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use crate::buf::BufferFormat; use crate::prelude::*; use displaydoc::Display; @@ -77,6 +78,11 @@ pub enum DataErrorKind { #[displaydoc("Missing source data")] #[cfg(feature = "datagen")] MissingSourceData, + + /// An error indicating that the desired buffer format is not available. This usually + /// means that a required feature was not enabled + #[displaydoc("Unavailable buffer format: {0:?} (does icu_provider need to be compiled with an additional feature?)")] + UnavailableBufferFormat(BufferFormat), } /// The error type for ICU4X data provider operations. @@ -224,25 +230,12 @@ impl DataError { /// it will print out the context. #[cfg(feature = "std")] #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] - pub fn with_path + ?Sized>(self, path: &P) -> Self { + pub fn with_path_context + ?Sized>(self, path: &P) -> Self { #[cfg(feature = "log_error_context")] log::warn!("{} (path: {:?})", self, path.as_ref()); self } - /// Logs the data error with the given context, then return self. - /// - /// This does not modify the error, but if the "log_error_context" feature is enabled, - /// it will print out the context. - #[cfg(feature = "std")] - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] - #[inline] - pub fn with_error_context(self, err: &E) -> Self { - #[cfg(feature = "log_error_context")] - log::warn!("{}: {}", self, err); - self - } - /// Logs the data error with the given context, then return self. /// /// This does not modify the error, but if the "log_error_context" feature is enabled, @@ -280,26 +273,6 @@ impl DataError { #[cfg(feature = "std")] impl std::error::Error for DataError {} -#[cfg(feature = "serde")] -impl From for DataError { - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] - fn from(e: crate::serde::Error) -> Self { - #[cfg(feature = "log_error_context")] - log::warn!("Serde error: {}", e); - DataError::custom("Serde error") - } -} - -#[cfg(feature = "postcard")] -impl From for DataError { - #[cfg_attr(not(feature = "log_error_context"), allow(unused_variables))] - fn from(e: postcard::Error) -> Self { - #[cfg(feature = "log_error_context")] - log::warn!("Postcard error: {}", e); - DataError::custom("Postcard error") - } -} - #[cfg(feature = "std")] impl From for DataError { fn from(e: std::io::Error) -> Self { diff --git a/provider/core/src/serde/de.rs b/provider/core/src/serde/de.rs index c0a12ec3ccb..f3d651c2a20 100644 --- a/provider/core/src/serde/de.rs +++ b/provider/core/src/serde/de.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::Error; use crate::buf::BufferFormat; use crate::buf::BufferProvider; use crate::prelude::*; @@ -11,24 +10,6 @@ use serde::de::Deserialize; use yoke::trait_hack::YokeTraitHack; use yoke::Yokeable; -/// Returns an error if the buffer format is not enabled. -pub fn check_format_supported(buffer_format: BufferFormat) -> Result<(), Error> { - match buffer_format { - #[cfg(feature = "deserialize_json")] - BufferFormat::Json => Ok(()), - - #[cfg(feature = "deserialize_bincode_1")] - BufferFormat::Bincode1 => Ok(()), - - #[cfg(feature = "deserialize_postcard_07")] - BufferFormat::Postcard07 => Ok(()), - - // Allowed for cases in which all features are enabled - #[allow(unreachable_patterns)] - _ => Err(Error::UnavailableFormat(buffer_format)), - } -} - /// A [`BufferProvider`] that deserializes its data using Serde. pub struct DeserializingBufferProvider<'a, P: ?Sized>(&'a P); @@ -52,7 +33,7 @@ fn deserialize_impl<'data, M>( #[allow(unused_variables)] bytes: &'data [u8], buffer_format: BufferFormat, _: PhantomData<&'data ()>, -) -> Result<>::Output, Error> +) -> Result<>::Output, DataError> where M: DataMarker, // Actual bound: @@ -88,12 +69,15 @@ where // Allowed for cases in which all features are enabled #[allow(unreachable_patterns)] - _ => Err(Error::UnavailableFormat(buffer_format)), + _ => Err(DataErrorKind::UnavailableBufferFormat(buffer_format).into_error()), } } impl DataPayload { - pub fn into_deserialized(self, buffer_format: BufferFormat) -> Result, Error> + pub fn into_deserialized( + self, + buffer_format: BufferFormat, + ) -> Result, DataError> where M: DataMarker, // Actual bound: @@ -134,23 +118,18 @@ where key: ResourceKey, req: &DataRequest, ) -> Result, DataError> { - let old_response = BufferProvider::load_buffer(self.0, key, req)?; - if let Some(old_payload) = old_response.payload { - let buffer_format = old_response - .metadata - .buffer_format - .ok_or(Error::FormatNotSpecified)?; - let new_payload = old_payload.into_deserialized(buffer_format)?; - Ok(DataResponse { - metadata: old_response.metadata, - payload: Some(new_payload), - }) - } else { - Ok(DataResponse { - metadata: old_response.metadata, - payload: None, - }) - } + let buffer_response = BufferProvider::load_buffer(self.0, key, req)?; + let buffer_format = buffer_response + .metadata + .buffer_format + .ok_or_else(|| DataError::custom("BufferProvider didn't set BufferFormat"))?; + Ok(DataResponse { + metadata: buffer_response.metadata, + payload: buffer_response + .payload + .map(|p| p.into_deserialized(buffer_format)) + .transpose()?, + }) } } diff --git a/provider/core/src/serde/mod.rs b/provider/core/src/serde/mod.rs index 848ec34b535..3966cca4f88 100644 --- a/provider/core/src/serde/mod.rs +++ b/provider/core/src/serde/mod.rs @@ -27,7 +27,6 @@ mod de; #[cfg(feature = "datagen")] mod ser; -pub use de::check_format_supported; pub use de::AsDeserializingBufferProvider; pub use de::DeserializingBufferProvider; @@ -36,68 +35,23 @@ pub use ser::SerializeBox; #[cfg(feature = "datagen")] pub use ser::SerializeMarker; -use crate::buf::BufferFormat; - -/// Error type for deserialization. -#[derive(displaydoc::Display, Debug)] -#[non_exhaustive] -pub enum Error { - /// An error originating in [`serde_json`]. - #[cfg(feature = "deserialize_json")] - #[displaydoc("{0}")] - Json(serde_json::error::Error), - - /// An error originating in [`bincode`]. - #[cfg(feature = "deserialize_bincode_1")] - #[displaydoc("{0}")] - Bincode1(bincode::Error), - - /// An error originating in [`postcard`]. - #[cfg(feature = "deserialize_postcard_07")] - #[displaydoc("{0}")] - Postcard07(postcard::Error), - - /// An error indicating that the desired buffer format is not available. This usually - /// means that a required feature was not enabled - #[allow(dead_code)] - #[displaydoc("Unavailable buffer format: {0:?} (does icu4x need to be compiled with an additional feature?)")] - UnavailableFormat(BufferFormat), - - /// An error originating in [`erased_serde`]. - #[displaydoc("{0}")] - #[cfg(feature = "datagen")] - Serde(erased_serde::Error), - - /// An error indicating that the buffer format could not be deduced. This is usually - /// unexpected and could indicate a problem with the data pipeline setup. - #[displaydoc("Buffer format not specified")] - FormatNotSpecified, -} - -#[cfg(feature = "deserialize_json")] -impl From for Error { +#[cfg(feature = "serde_json")] +impl From for crate::DataError { fn from(e: serde_json::error::Error) -> Self { - Error::Json(e) + crate::DataError::custom("JSON deserialize").with_display_context(&e) } } -#[cfg(feature = "deserialize_bincode_1")] -impl From for Error { +#[cfg(feature = "bincode")] +impl From for crate::DataError { fn from(e: bincode::Error) -> Self { - Error::Bincode1(e) + crate::DataError::custom("Bincode deserialize").with_display_context(&e) } } -#[cfg(feature = "deserialize_postcard_07")] -impl From for Error { +#[cfg(feature = "postcard")] +impl From for crate::DataError { fn from(e: postcard::Error) -> Self { - Error::Postcard07(e) - } -} - -#[cfg(feature = "datagen")] -impl From for Error { - fn from(e: erased_serde::Error) -> Self { - Error::Serde(e) + crate::DataError::custom("Postcard deserialize").with_display_context(&e) } } diff --git a/provider/core/src/serde/ser.rs b/provider/core/src/serde/ser.rs index c9ddbf6ced6..b96ef83766d 100644 --- a/provider/core/src/serde/ser.rs +++ b/provider/core/src/serde/ser.rs @@ -2,24 +2,15 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::Error; use crate::dynutil::UpcastDataPayload; use crate::prelude::*; use crate::yoke::*; use alloc::boxed::Box; -use core::ops::Deref; /// A wrapper around `Box` for integration with DataProvider. #[derive(yoke::Yokeable)] pub struct SerializeBox(Box); -impl Deref for SerializeBox { - type Target = dyn erased_serde::Serialize; - fn deref(&self) -> &Self::Target { - self.0.deref() - } -} - impl UpcastDataPayload for SerializeMarker where M: DataMarker, @@ -60,17 +51,19 @@ impl DataPayload { /// let mut buffer: Vec = vec![]; /// payload /// .into_serializable() - /// .serialize(&mut ::erase( - /// &mut serde_json::Serializer::new(&mut buffer), - /// )) + /// .serialize(&mut serde_json::Serializer::new(&mut buffer)) /// .expect("Serialization should succeed"); /// assert_eq!("{\"message\":\"(und) Hello World\"}".as_bytes(), buffer); /// ``` - pub fn serialize( - &self, - mut serializer: &mut dyn erased_serde::Serializer, - ) -> Result<(), Error> { - self.get().erased_serialize(&mut serializer)?; + pub fn serialize(&self, serializer: S) -> Result<(), DataError> + where + S: serde::Serializer, + S::Ok: 'static, // erased_serde requirement, cannot return values in `Ok` + { + self.get() + .0 + .erased_serialize(&mut ::erase(serializer)) + .map_err(|e| DataError::custom("Serde export").with_display_context(&e))?; Ok(()) } } diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 49b515f9c5d..3f4afd69d39 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -48,7 +48,7 @@ icu_collator = { version = "0.6", path = "../../experimental/collator", features icu_normalizer = { version = "0.6", path = "../../experimental/normalizer", features = ["datagen"], optional = true } # ICU provider infrastructure -icu_provider = { version = "0.6", path = "../core", features = ["std", "log_error_context"]} +icu_provider = { version = "0.6", path = "../core", features = ["std", "log_error_context", "datagen"]} icu_provider_adapters = { version = "0.6", path = "../adapters", features = ["datagen"] } icu_provider_blob = { version = "0.6", path = "../blob", features = ["export"] } icu_provider_fs = { version = "0.6", path = "../fs", features = ["export"] } diff --git a/provider/datagen/src/bin/datagen.rs b/provider/datagen/src/bin/datagen.rs index 0ad01187b7a..4573f8d1d24 100644 --- a/provider/datagen/src/bin/datagen.rs +++ b/provider/datagen/src/bin/datagen.rs @@ -335,16 +335,14 @@ fn main() -> eyre::Result<()> { matches.is_present("IGNORE_MISSING_DATA"), ) .map_err(|e| -> eyre::ErrReport { - if icu_datagen::is_missing_cldr_error(e) { - eyre::eyre!( + match e { + icu_datagen::MISSING_CLDR_ERROR => eyre::eyre!( "Either --cldr-tag or --cldr-root or --input-from-testdata must be specified" - ) - } else if icu_datagen::is_missing_uprops_error(e) { - eyre::eyre!( + ), + icu_datagen::MISSING_UPROPS_ERROR => eyre::eyre!( "Either --uprops-tag or --uprops-root or --input-from-testdata must be specified" - ) - } else { - e.into() + ), + e => e.into(), } }) } diff --git a/provider/datagen/src/error.rs b/provider/datagen/src/error.rs index 4371094c273..b5685a72583 100644 --- a/provider/datagen/src/error.rs +++ b/provider/datagen/src/error.rs @@ -2,131 +2,25 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use displaydoc::Display; -use icu_locid::LanguageIdentifier; use icu_provider::{DataError, DataErrorKind}; -use std::path::{Path, PathBuf}; - -#[non_exhaustive] -#[derive(Display, Debug)] -pub enum DatagenError { - #[displaydoc("{0}: {1:?}")] - Io(std::io::Error, Option), - #[displaydoc("JSON error: {0}: {1:?}")] - Json(serde_json::error::Error, Option), - #[displaydoc("TOML error: {0}: {1:?}")] - Toml(toml::de::Error, Option), - #[displaydoc("{0}: {1:?}")] - Custom(String, Option), - #[displaydoc("Missing CLDR data")] - MissingCldrPaths, - #[displaydoc("Missing Unicode properties data")] - MissingUpropsPath, - #[displaydoc("Missing collation data")] - MissingCollPath, -} - -impl std::error::Error for DatagenError {} - -/// To help with debugging, I/O errors should be paired with a file path. -/// If a path is unavailable, create the error directly: [`DatagenError::Io`]`(err, `[`None`]`)` -impl> From<(std::io::Error, P)> for DatagenError { - fn from(pieces: (std::io::Error, P)) -> Self { - Self::Io(pieces.0, Some(pieces.1.as_ref().to_path_buf())) - } -} - -/// To help with debugging, JSON errors should be paired with a file path. -/// If a path is unavailable, create the error directly: [`DatagenError::Json`]`(err, `[`None`]`)` -impl> From<(serde_json::error::Error, P)> for DatagenError { - fn from(pieces: (serde_json::error::Error, P)) -> Self { - Self::Json(pieces.0, Some(pieces.1.as_ref().to_path_buf())) - } -} - -/// To help with debugging, TOML errors should be paired with a file path. -/// If a path is unavailable, create the error directly: [`DatagenError::Toml`]`(err, `[`None`]`)` -impl> From<(toml::de::Error, P)> for DatagenError { - fn from(pieces: (toml::de::Error, P)) -> Self { - Self::Toml(pieces.0, Some(pieces.1.as_ref().to_path_buf())) - } -} - -/// To help with debugging, string errors should be paired with a locale. -/// If a locale is unavailable, create the error directly: [`DatagenError::Custom`]`(err, `[`None`]`)` -impl> From<(String, L)> for DatagenError { - fn from(pieces: (String, L)) -> Self { - Self::Custom(pieces.0, Some(pieces.1.as_ref().clone())) - } -} - -/// To help with debugging, string errors should be paired with a locale. -/// If a locale is unavailable, create the error directly: [`DatagenError::Custom`]`(err, `[`None`]`)` -impl> From<(&'static str, L)> for DatagenError { - fn from(pieces: (&'static str, L)) -> Self { - Self::Custom(pieces.0.to_string(), Some(pieces.1.as_ref().clone())) - } -} - -impl From for DataError { - fn from(err: DatagenError) -> Self { - use DatagenError::*; - match err { - Io(e, Some(path_buf)) => DataError::from(e).with_path(&path_buf), - Io(e, None) => DataError::from(e), - Json(e, Some(path_buf)) => DataError::custom("JSON Parse Error") - .with_error_context(&e) - .with_path(&path_buf), - Json(e, None) => DataError::custom("JSON Parse Error").with_error_context(&e), - Toml(e, Some(path_buf)) => DataError::custom("TOML Parse Error") - .with_error_context(&e) - .with_path(&path_buf), - Toml(e, None) => DataError::custom("TOML Parse Error").with_error_context(&e), - Custom(s, Some(langid)) => DataError::custom("") - .with_display_context(&s) - .with_display_context(&langid), - Custom(s, None) => DataError::custom("").with_display_context(&s), - MissingCldrPaths => { - DataErrorKind::MissingSourceData.with_str_context(CLDR_SOURCE_MARKER) - } - MissingUpropsPath => { - DataErrorKind::MissingSourceData.with_str_context(UPROPS_SOURCE_MARKER) - } - MissingCollPath => { - DataErrorKind::MissingSourceData.with_str_context(COLLATION_SOURCE_MARKER) - } - } - } -} - -const UPROPS_SOURCE_MARKER: &str = "Uprops"; -const CLDR_SOURCE_MARKER: &str = "CLDR"; -const COLLATION_SOURCE_MARKER: &str = "Collation"; /// Identifies errors that are due to missing CLDR data. /// /// See [`datagen`](crate::datagen). -pub fn is_missing_cldr_error(e: DataError) -> bool { - matches!( - e, - DataError { - kind: DataErrorKind::MissingSourceData, - str_context: Some(CLDR_SOURCE_MARKER), - .. - } - ) -} +pub const MISSING_CLDR_ERROR: DataError = DataErrorKind::MissingSourceData.with_str_context("CLDR"); /// Identifies errors that are due to missing Unicode properties data. /// /// See [`datagen`](crate::datagen). -pub fn is_missing_uprops_error(e: DataError) -> bool { - matches!( - e, - DataError { - kind: DataErrorKind::MissingSourceData, - str_context: Some(UPROPS_SOURCE_MARKER), - .. - } - ) +pub const MISSING_UPROPS_ERROR: DataError = + DataErrorKind::MissingSourceData.with_str_context("Uprops"); + +/// Identifies errors that are due to missing collation data. +/// +/// See ['datagen`](crate::datagen). +pub const MISSING_COLLATION_ERROR: DataError = + DataErrorKind::MissingSourceData.with_str_context("Collation"); + +pub(crate) fn data_error_from_toml(other: toml::de::Error) -> DataError { + DataError::custom("Toml deserialize").with_display_context(&other) } diff --git a/provider/datagen/src/lib.rs b/provider/datagen/src/lib.rs index a57f36602db..4160e9f0dc1 100644 --- a/provider/datagen/src/lib.rs +++ b/provider/datagen/src/lib.rs @@ -63,7 +63,7 @@ mod registry; mod source; pub mod transform; -pub use error::{is_missing_cldr_error, is_missing_uprops_error}; +pub use error::{MISSING_CLDR_ERROR, MISSING_COLLATION_ERROR, MISSING_UPROPS_ERROR}; pub use registry::get_all_keys; pub use source::SourceData; @@ -169,8 +169,8 @@ pub enum Out { /// Otherwise, all locales supported by the source data will be generated. /// * `keys`: The keys for which to generate data. See [`get_all_keys()`]. /// * `sources`: The underlying source data. CLDR and/or uprops data can be missing if no -/// requested key requires them. Otherwise a error will be returned that can be identified -/// with [`is_missing_cldr_error`] or [`is_missing_uprops_error`]. +/// requested key requires them, otherwise [`MISSING_CLDR_ERROR`] or [`MISSING_UPROPS_ERROR`] +/// will be returned. /// * `out`: The output format and location. See the documentation on [`Out`] /// * `ignore_missing_resource_keys`: some keys are not supported by datagen yet. Using /// all keys will not work unless this option is set. diff --git a/provider/datagen/src/source.rs b/provider/datagen/src/source.rs index be7d8411af1..1323ea2518c 100644 --- a/provider/datagen/src/source.rs +++ b/provider/datagen/src/source.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use icu_codepointtrie::TrieType; use icu_provider::DataError; use std::path::{Path, PathBuf}; @@ -85,18 +84,16 @@ impl SourceData { /// Paths to CLDR source data. pub(crate) fn get_cldr_paths(&self) -> Result<&CldrPaths, DataError> { - Ok(self - .cldr_paths + self.cldr_paths .as_ref() - .ok_or(DatagenError::MissingCldrPaths)?) + .ok_or(crate::error::MISSING_CLDR_ERROR) } /// Path to Unicode Properties source data. pub(crate) fn get_uprops_root(&self) -> Result<&Path, DataError> { - Ok(self - .uprops_root + self.uprops_root .as_deref() - .ok_or(DatagenError::MissingUpropsPath)?) + .ok_or(crate::error::MISSING_UPROPS_ERROR) } /// Path to segmenter data. @@ -113,10 +110,9 @@ impl SourceData { /// Path to collation data. #[cfg(feature = "experimental")] pub(crate) fn get_coll_root(&self) -> Result<&Path, DataError> { - Ok(self - .coll_root + self.coll_root .as_deref() - .ok_or(DatagenError::MissingCollPath)?) + .ok_or(crate::error::MISSING_COLLATION_ERROR) } } diff --git a/provider/datagen/src/transform/cldr/calendar/japanese.rs b/provider/datagen/src/transform/cldr/calendar/japanese.rs index ae3c04abd3c..7b7f63cd8d9 100644 --- a/provider/datagen/src/transform/cldr/calendar/japanese.rs +++ b/provider/datagen/src/transform/cldr/calendar/japanese.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::open_reader; use crate::SourceData; @@ -61,10 +60,10 @@ impl ResourceProvider for JapaneseErasProvider { let era_names: cldr_serde::ca::Resource = serde_json::from_reader(open_reader(&era_names_path)?) - .map_err(|e| DatagenError::from((e, era_names_path)))?; + .map_err(|e| DataError::from(e).with_path_context(&era_names_path))?; let era_dates: cldr_serde::japanese::Resource = serde_json::from_reader(open_reader(&era_dates_path)?) - .map_err(|e| DatagenError::from((e, era_dates_path)))?; + .map_err(|e| DataError::from(e).with_path_context(&era_dates_path))?; let era_name_map = &era_names .main @@ -96,28 +95,18 @@ impl ResourceProvider for JapaneseErasProvider { let date = &era_dates_map .get(era_id) .ok_or_else(|| { - DatagenError::Custom( - format!( - "calendarData.json contains no data for japanese era index {}", - era_id - ), - None, - ) + DataError::custom("calendarData.json is missing data for a japanese era") + .with_display_context(&format!("era index {}", era_id)) })? .start; let start_date = EraStartDate::from_str(date).map_err(|_| { - DatagenError::Custom( - format!( - "calendarData.json contains unparseable data for japanese era index {}", - era_id - ), - None, - ) + DataError::custom("calendarData.json contains unparseable data for a japanese era") + .with_display_context(&format!("era index {}", era_id)) })?; let code = era_to_code(era_name, start_date.year) - .map_err(|e| DatagenError::Custom(e, None))?; + .map_err(|e| DataError::custom("Era codes").with_display_context(&e))?; if start_date.year >= 1868 { ret.dates_to_eras .to_mut() diff --git a/provider/datagen/src/transform/cldr/datetime/mod.rs b/provider/datagen/src/transform/cldr/datetime/mod.rs index 3f9fa5e082c..72455f4aa59 100644 --- a/provider/datagen/src/transform/cldr/datetime/mod.rs +++ b/provider/datagen/src/transform/cldr/datetime/mod.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::{get_langid_subdirectories, get_langid_subdirectory, open_reader}; use crate::SourceData; @@ -80,8 +79,7 @@ macro_rules! impl_resource_provider { .join(&format!("ca-{}.json", cldr_cal)); let mut resource: cldr_serde::ca::Resource = - serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + serde_json::from_reader(open_reader(&path)?).map_err(|e| DataError::from(e).with_path_context(&path))?; self.data.insert( req.options.clone(), diff --git a/provider/datagen/src/transform/cldr/datetime/week_data.rs b/provider/datagen/src/transform/cldr/datetime/week_data.rs index ec935d55158..7f6c3171b9d 100644 --- a/provider/datagen/src/transform/cldr/datetime/week_data.rs +++ b/provider/datagen/src/transform/cldr/datetime/week_data.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde::{ self, week_data::{Territory, DEFAULT_TERRITORY}, @@ -42,7 +41,7 @@ impl WeekDataProvider { .join("supplemental/weekData.json"); let resource: cldr_serde::week_data::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; let week_data = resource.supplemental.week_data; *self.data.write().unwrap() = Some(( CalendarInfo { diff --git a/provider/datagen/src/transform/cldr/decimal/mod.rs b/provider/datagen/src/transform/cldr/decimal/mod.rs index f83166fc01f..bce53af4521 100644 --- a/provider/datagen/src/transform/cldr/decimal/mod.rs +++ b/provider/datagen/src/transform/cldr/decimal/mod.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::{get_langid_subdirectories, get_langid_subdirectory, open_reader}; use crate::SourceData; @@ -84,7 +83,7 @@ impl ResourceProvider for NumbersProvider { .ok_or_else(|| DataErrorKind::MissingLocale.into_error())? .join("numbers.json"); serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))? + .map_err(|e| DataError::from(e).with_path_context(&path))? }; #[allow(clippy::expect_used)] // TODO(#1668) Clippy exceptions need docs or fixing. @@ -96,8 +95,9 @@ impl ResourceProvider for NumbersProvider { .numbers; let nsname = numbers.default_numbering_system; - let mut result = DecimalSymbolsV1::try_from(numbers) - .map_err(|s| DatagenError::Custom(s.to_string(), Some(langid.clone())))?; + let mut result = DecimalSymbolsV1::try_from(numbers).map_err(|s| { + DataError::custom("Could not create decimal symbols").with_display_context(&s) + })?; #[allow(clippy::unwrap_used)] // TODO(#1668) Clippy exceptions need docs or fixing. if self.cldr_numbering_systems_data.read().unwrap().is_none() { @@ -109,7 +109,7 @@ impl ResourceProvider for NumbersProvider { .join("numberingSystems.json"); let resource: cldr_serde::numbering_systems::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; let _ = self .cldr_numbering_systems_data .write() @@ -120,10 +120,8 @@ impl ResourceProvider for NumbersProvider { result.digits = self .get_digits_for_numbering_system(nsname) .ok_or_else(|| { - DatagenError::Custom( - format!("Could not process numbering system: {:?}", nsname), - Some(langid.clone()), - ) + DataError::custom("Could not process numbering system") + .with_display_context(&nsname) })?; let metadata = DataResponseMetadata::default(); diff --git a/provider/datagen/src/transform/cldr/list/mod.rs b/provider/datagen/src/transform/cldr/list/mod.rs index ab02fd067e6..f9ffd32a128 100644 --- a/provider/datagen/src/transform/cldr/list/mod.rs +++ b/provider/datagen/src/transform/cldr/list/mod.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::{get_langid_subdirectories, get_langid_subdirectory, open_reader}; use crate::transform::uprops::EnumeratedPropertyCodePointTrieProvider; @@ -40,7 +39,7 @@ impl>> ResourcePro .ok_or_else(|| DataErrorKind::MissingLocale.into_error())? .join("listPatterns.json"); serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))? + .map_err(|e| DataError::from(e).with_path_context(&path))? }; let data = &resource diff --git a/provider/datagen/src/transform/cldr/locale_canonicalizer/aliases.rs b/provider/datagen/src/transform/cldr/locale_canonicalizer/aliases.rs index 0a593ebe74e..f5ce5d4cead 100644 --- a/provider/datagen/src/transform/cldr/locale_canonicalizer/aliases.rs +++ b/provider/datagen/src/transform/cldr/locale_canonicalizer/aliases.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::open_reader; use crate::SourceData; @@ -42,7 +41,7 @@ impl ResourceProvider for AliasesProvider { .join("supplemental") .join("aliases.json"); let data: cldr_serde::aliases::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; let metadata = DataResponseMetadata::default(); // TODO(#1109): Set metadata.data_langid correctly. diff --git a/provider/datagen/src/transform/cldr/locale_canonicalizer/likely_subtags.rs b/provider/datagen/src/transform/cldr/locale_canonicalizer/likely_subtags.rs index 27c7e26dbf7..05706bbe20f 100644 --- a/provider/datagen/src/transform/cldr/locale_canonicalizer/likely_subtags.rs +++ b/provider/datagen/src/transform/cldr/locale_canonicalizer/likely_subtags.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::open_reader; use crate::SourceData; @@ -44,7 +43,7 @@ impl ResourceProvider for LikelySubtagsProvider { .join("likelySubtags.json"); let data: cldr_serde::likely_subtags::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; let metadata = DataResponseMetadata::default(); // TODO(#1109): Set metadata.data_langid correctly. diff --git a/provider/datagen/src/transform/cldr/plurals/mod.rs b/provider/datagen/src/transform/cldr/plurals/mod.rs index f93fe70e328..b7c94061e55 100644 --- a/provider/datagen/src/transform/cldr/plurals/mod.rs +++ b/provider/datagen/src/transform/cldr/plurals/mod.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::reader::open_reader; use crate::SourceData; @@ -49,7 +48,7 @@ impl PluralsProvider { .join("plurals.json"); let data: cldr_serde::plurals::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; let _ = self .cardinal_rules .write() @@ -72,7 +71,7 @@ impl PluralsProvider { .join("ordinals.json"); let data: cldr_serde::plurals::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; let _ = self .ordinal_rules .write() diff --git a/provider/datagen/src/transform/cldr/time_zones/mod.rs b/provider/datagen/src/transform/cldr/time_zones/mod.rs index d71ac221a9a..5c8622832c9 100644 --- a/provider/datagen/src/transform/cldr/time_zones/mod.rs +++ b/provider/datagen/src/transform/cldr/time_zones/mod.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::transform::cldr::cldr_serde; use crate::transform::cldr::cldr_serde::time_zones::time_zone_names::TimeZoneNames; use crate::transform::cldr::cldr_serde::time_zones::CldrTimeZonesData; @@ -61,7 +60,7 @@ macro_rules! impl_resource_provider { .join("timeZoneNames.json"); let mut resource: cldr_serde::time_zones::time_zone_names::Resource = serde_json::from_reader(open_reader(&path)?) - .map_err(|e| DatagenError::from((e, path)))?; + .map_err(|e| DataError::from(e).with_path_context(&path))?; self.time_zone_names_data.insert( langid.clone(), Box::new( @@ -86,7 +85,7 @@ macro_rules! impl_resource_provider { let resource: cldr_serde::time_zones::bcp47_tzid::Resource = serde_json::from_reader(open_reader(&bcp47_time_zone_path)?) - .map_err(|e| DatagenError::from((e, bcp47_time_zone_path)))?; + .map_err(|e| DataError::from(e).with_path_context(&bcp47_time_zone_path))?; let r = resource.keyword.u.time_zones.values; let mut data_guard = self.bcp47_tzid_data.write().unwrap(); @@ -109,7 +108,7 @@ macro_rules! impl_resource_provider { let resource: cldr_serde::time_zones::meta_zones::Resource = serde_json::from_reader(open_reader(&meta_zone_id_path)?) - .map_err(|e| DatagenError::from((e, meta_zone_id_path)))?; + .map_err(|e| DataError::from(e).with_path_context(&meta_zone_id_path))?; let r = resource.supplemental.meta_zones.meta_zone_ids.0; let mut data_guard = self.meta_zone_id_data.write().unwrap(); diff --git a/provider/datagen/src/transform/collator/mod.rs b/provider/datagen/src/transform/collator/mod.rs index ade30bbb8c1..0073e0e4a07 100644 --- a/provider/datagen/src/transform/collator/mod.rs +++ b/provider/datagen/src/transform/collator/mod.rs @@ -5,7 +5,6 @@ //! This module transforms collation-related TOML files created by //! `genrb -X` in the ICU4C repo to ICU4X-internal data structures. -use crate::error::DatagenError; use crate::transform::reader::get_dir_contents; use crate::transform::reader::read_path_to_string; use crate::SourceData; @@ -103,7 +102,7 @@ macro_rules! collation_provider { /// A data provider reading from .toml files produced by the ICU4C genrb tool. impl $provider { - fn load_data_if_not_loaded(&self) -> Result<(), DatagenError> { + fn load_data_if_not_loaded(&self) -> Result<(), DataError> { if self.data.read().unwrap().is_some() { return Ok(()); } @@ -112,17 +111,14 @@ macro_rules! collation_provider { return Ok(()); } - let root_dir = self - .source - .get_coll_root() - .map_err(|e| DatagenError::Custom(e.to_string(), None))?; + let root_dir = self.source.get_coll_root()?; let mut data: HashMap = HashMap::new(); for path in get_dir_contents(&root_dir)? { let stem_bytes = if let Some(stem_bytes) = path .file_stem() .and_then(|p| p.to_str()) - .ok_or_else(|| DatagenError::Custom("Invalid file name".to_string(), None))? + .ok_or_else(|| DataError::custom("Invalid file name"))? .as_bytes() .strip_suffix($suffix) { @@ -130,12 +126,12 @@ macro_rules! collation_provider { } else { continue; }; - let mut key = String::from_utf8(stem_bytes.to_vec()).map_err(|_| { - DatagenError::Custom("Non-UTF-8 file name".to_string(), None) - })?; + let mut key = String::from_utf8(stem_bytes.to_vec()) + .map_err(|_| DataError::custom("Non-UTF-8 file name"))?; let toml_str = read_path_to_string(&path)?; - let toml_obj: $serde_struct = toml::from_str(&toml_str) - .map_err(|e| DatagenError::Custom(e.to_string(), None))?; + let toml_obj: $serde_struct = toml::from_str(&toml_str).map_err(|e| { + crate::error::data_error_from_toml(e).with_path_context(&path) + })?; key.make_ascii_lowercase(); data.insert(key, toml_obj); } @@ -260,7 +256,7 @@ collation_provider!( b"_data", icu_collator::provider::CollationDataV1 { trie: CodePointTrie::::try_from(&toml_data.trie) - .map_err(|e| DatagenError::Custom(e.to_string(), None))?, + .map_err(|e| DataError::custom("trie conversion").with_display_context(&e))?, contexts: ZeroVec::alloc_from_slice(&toml_data.contexts), ce32s: ZeroVec::alloc_from_slice(&toml_data.ce32s), ces: toml_data.ces.iter().map(|i| *i as u64).collect(), diff --git a/provider/datagen/src/transform/reader.rs b/provider/datagen/src/transform/reader.rs index a95da99e873..3102fde351b 100644 --- a/provider/datagen/src/transform/reader.rs +++ b/provider/datagen/src/transform/reader.rs @@ -2,8 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use icu_locid::LanguageIdentifier; +use icu_provider::DataError; use std::fs; use std::fs::File; use std::io::BufReader; @@ -12,26 +12,30 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; /// Helper function to open a file and return failures as a crate error. -pub fn open_reader(path: &Path) -> Result, DatagenError> { +pub fn open_reader(path: &Path) -> Result, DataError> { log::trace!("Reading: {:?}", path); File::open(&path) .map(BufReader::new) - .map_err(|e| (e, path).into()) + .map_err(|e| DataError::from(e).with_path_context(path)) } /// Read the contents of the file at `path` and return it as a `String`. -pub fn read_path_to_string(path: &Path) -> Result { +pub fn read_path_to_string(path: &Path) -> Result { let mut reader = open_reader(path)?; let mut buffer = String::new(); - reader.read_to_string(&mut buffer).map_err(|e| (e, path))?; + reader + .read_to_string(&mut buffer) + .map_err(|e| DataError::from(e).with_path_context(path))?; Ok(buffer) } /// Helper function which returns a sorted list of the contents of a directory. -pub fn get_dir_contents(root: &Path) -> Result, DatagenError> { +pub fn get_dir_contents(root: &Path) -> Result, DataError> { let mut result = vec![]; - for entry in fs::read_dir(root).map_err(|e| (e, root))? { - let path = entry.map_err(|e| (e, root))?.path(); + for entry in fs::read_dir(root).map_err(|e| DataError::from(e).with_path_context(root))? { + let path = entry + .map_err(|e| DataError::from(e).with_path_context(&root))? + .path(); result.push(path); } result.sort(); @@ -40,10 +44,10 @@ pub fn get_dir_contents(root: &Path) -> Result, DatagenError> { fn get_langid_subdirectories_internal( root: &Path, -) -> Result, DatagenError> { +) -> Result, DataError> { let mut result = vec![]; - for entry in fs::read_dir(root).map_err(|e| (e, root))? { - let entry = entry.map_err(|e| (e, root))?; + for entry in fs::read_dir(root).map_err(|e| DataError::from(e).with_path_context(&root))? { + let entry = entry.map_err(|e| DataError::from(e).with_path_context(&root))?; let path = entry.path(); result.push(path); } @@ -59,7 +63,7 @@ fn get_langid_subdirectories_internal( /// Helper function which returns an unsorted list of langids for which subdirectories exist. pub fn get_langid_subdirectories( root: &Path, -) -> Result, DatagenError> { +) -> Result, DataError> { get_langid_subdirectories_internal(root).map(|iter| iter.map(|(l, _)| l)) } @@ -67,7 +71,7 @@ pub fn get_langid_subdirectories( pub fn get_langid_subdirectory( root: &Path, langid: &LanguageIdentifier, -) -> Result, DatagenError> { +) -> Result, DataError> { get_langid_subdirectories_internal(root).map(|mut iter| { iter.find(|(langid2, _)| langid2 == langid) .map(|(_, path)| path) diff --git a/provider/datagen/src/transform/uprops/canonical_decompositions.rs b/provider/datagen/src/transform/uprops/canonical_decompositions.rs index a8eb2a90e71..a9a1f7eea11 100644 --- a/provider/datagen/src/transform/uprops/canonical_decompositions.rs +++ b/provider/datagen/src/transform/uprops/canonical_decompositions.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::DatagenError; use crate::SourceData; use icu_codepointtrie::CodePointTrie; use std::convert::TryFrom; @@ -45,8 +44,8 @@ impl ResourceProvider for CanonicalDecomposi let path_buf = self.source.get_uprops_root()?.join("decompositions.toml"); let path: &Path = &path_buf; let toml_str = read_path_to_string(path)?; - let toml_obj: CanonicalDecompositionData = - toml::from_str(&toml_str).map_err(|e| DatagenError::from((e, path)))?; + let toml_obj: CanonicalDecompositionData = toml::from_str(&toml_str) + .map_err(|e| crate::error::data_error_from_toml(e).with_path_context(path))?; *self.data.write().unwrap() = Some(toml_obj); } @@ -60,12 +59,13 @@ impl ResourceProvider for CanonicalDecomposi } let uniset = builder.build(); - let trie = CodePointTrie::::try_from(&toml_data.trie); + let trie = CodePointTrie::::try_from(&toml_data.trie) + .map_err(|e| DataError::custom("trie conversion").with_display_context(&e))?; Ok(DataResponse { metadata: DataResponseMetadata::default(), payload: Some(DataPayload::from_owned(CanonicalDecompositionDataV1 { - trie: trie.map_err(|e| DatagenError::Custom(e.to_string(), None))?, + trie, scalars16: ZeroVec::alloc_from_slice(&toml_data.scalars16), scalars32: ZeroVec::alloc_from_slice(&toml_data.scalars32), decomposition_starts_with_non_starter: uniset, diff --git a/provider/datagen/src/transform/uprops/casemapping.rs b/provider/datagen/src/transform/uprops/casemapping.rs index b9558b682ab..05bcfd0c272 100644 --- a/provider/datagen/src/transform/uprops/casemapping.rs +++ b/provider/datagen/src/transform/uprops/casemapping.rs @@ -3,7 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::transform::uprops::uprops_serde; -use crate::{error::DatagenError, SourceData}; +use crate::SourceData; use icu_casemapping::provider::{CaseMappingV1, CaseMappingV1Marker}; use icu_casemapping::CaseMappingInternals; use icu_codepointtrie::toml::CodePointDataSlice; @@ -31,9 +31,10 @@ impl ResourceProvider for CaseMappingDataProvider { _req: &DataRequest, ) -> Result, DataError> { let path = self.source.get_uprops_root()?.join("ucase.toml"); - let toml_str = fs::read_to_string(&path).map_err(|e| DatagenError::from((e, &path)))?; - let toml: uprops_serde::case::Main = - toml::from_str(&toml_str).map_err(|e| DatagenError::from((e, &path)))?; + let toml_str = + fs::read_to_string(&path).map_err(|e| DataError::from(e).with_path_context(&path))?; + let toml: uprops_serde::case::Main = toml::from_str(&toml_str) + .map_err(|e| crate::error::data_error_from_toml(e).with_path_context(&path))?; let trie_data = &toml.ucase.code_point_trie; let trie_header = CodePointTrieHeader::try_from(trie_data).map_err(|e| { diff --git a/provider/datagen/src/transform/uprops/uprops_helpers.rs b/provider/datagen/src/transform/uprops/uprops_helpers.rs index 341f2897420..81b9191ae14 100644 --- a/provider/datagen/src/transform/uprops/uprops_helpers.rs +++ b/provider/datagen/src/transform/uprops/uprops_helpers.rs @@ -4,25 +4,25 @@ use crate::transform::reader::*; -use crate::error::DatagenError; use crate::transform::uprops::uprops_serde; +use icu_provider::DataError; use std::collections::HashMap; use std::path::Path; pub type TomlEnumerated = HashMap; pub type TomlBinary = HashMap; -pub fn load_binary_from_dir(root_dir: &Path) -> Result { +pub fn load_binary_from_dir(root_dir: &Path) -> Result { let mut result = HashMap::new(); for path in get_dir_contents(root_dir)? { let key: String = path .file_stem() .and_then(|p| p.to_str()) - .ok_or_else(|| DatagenError::Custom(format!("Invalid file name: {:?}", path), None))? + .ok_or_else(|| DataError::custom("Invalid file name").with_path_context(&path))? .to_string(); let toml_str = read_path_to_string(&path)?; - let toml_obj: uprops_serde::binary::Main = - toml::from_str(&toml_str).map_err(|e| (e, path))?; + let toml_obj: uprops_serde::binary::Main = toml::from_str(&toml_str) + .map_err(|e| crate::error::data_error_from_toml(e).with_path_context(&path))?; if let Some(v) = toml_obj.binary_property.into_iter().next() { result.insert(key, v); } @@ -30,17 +30,17 @@ pub fn load_binary_from_dir(root_dir: &Path) -> Result Ok(result) } -pub fn load_enumerated_from_dir(root_dir: &Path) -> Result { +pub fn load_enumerated_from_dir(root_dir: &Path) -> Result { let mut result = HashMap::new(); for path in get_dir_contents(root_dir)? { let key: String = path .file_stem() .and_then(|p| p.to_str()) - .ok_or_else(|| DatagenError::Custom(format!("Invalid file name: {:?}", path), None))? + .ok_or_else(|| DataError::custom("Invalid file name").with_path_context(&path))? .to_string(); let toml_str = read_path_to_string(&path)?; - let toml_obj: uprops_serde::enumerated::Main = - toml::from_str(&toml_str).map_err(|e| (e, path))?; + let toml_obj: uprops_serde::enumerated::Main = toml::from_str(&toml_str) + .map_err(|e| crate::error::data_error_from_toml(e).with_path_context(&path))?; if let Some(v) = toml_obj.enum_property.into_iter().next() { result.insert(key, v); } @@ -50,24 +50,19 @@ pub fn load_enumerated_from_dir(root_dir: &Path) -> Result Result { +) -> Result { let mut path = root_dir.join("scx"); path.set_extension("toml"); let toml_str = read_path_to_string(&path)?; - let toml_obj: uprops_serde::script_extensions::Main = - toml::from_str(&toml_str).map_err(|e| (e, &path))?; + let toml_obj: uprops_serde::script_extensions::Main = toml::from_str(&toml_str) + .map_err(|e| crate::error::data_error_from_toml(e).with_path_context(&path))?; toml_obj .script_extensions .into_iter() .next() .ok_or_else(|| { - DatagenError::Custom( - format!( - "Could not parse Script_Extensions data from TOML {:?}", - path - ), - None, - ) + DataError::custom("Could not parse Script_Extensions data from TOML") + .with_path_context(&path) }) } diff --git a/provider/fs/Cargo.toml b/provider/fs/Cargo.toml index 3f0a3f3f7c3..bc8334f11d2 100644 --- a/provider/fs/Cargo.toml +++ b/provider/fs/Cargo.toml @@ -44,7 +44,6 @@ log = { version = "0.4", optional = true } serde_json = { version = "1.0", optional = true } bincode = { version = "1.3", optional = true } postcard = { version = "0.7", features = ["use-std"], optional = true } -erased-serde = { version = "0.3", default-features = false, features = ["alloc"], optional = true } crlify = { version = "1", path = "../../utils/crlify", optional = true } [dev-dependencies] @@ -60,7 +59,6 @@ export = [ "serde_json", "bincode", "postcard", - "erased-serde", "crlify", "icu_provider/datagen" ] diff --git a/provider/fs/src/error.rs b/provider/fs/src/error.rs deleted file mode 100644 index 303d787631b..00000000000 --- a/provider/fs/src/error.rs +++ /dev/null @@ -1,99 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use displaydoc::Display; -use icu_provider::DataError; -use std::path::{Path, PathBuf}; - -#[derive(Display, Debug)] -#[non_exhaustive] -pub enum Error { - #[displaydoc("{0}: {1:?}")] - Io(std::io::Error, Option), - #[displaydoc("{0}")] - DataProvider(DataError), - #[displaydoc("Deserializer error: {0}: {1:?}")] - Deserializer(String, Option), - #[cfg(feature = "export")] - #[displaydoc("Serializer error: {0}: {1:?}")] - Serializer(erased_serde::Error, Option), -} - -impl std::error::Error for Error {} - -impl From for Error { - fn from(e: DataError) -> Self { - Error::DataProvider(e) - } -} - -impl From for Error { - fn from(e: icu_provider::serde::Error) -> Self { - Error::DataProvider(e.into()) - } -} - -/// To help with debugging, I/O errors should be paired with a file path. -/// If a path is unavailable, create the error directly: [`Error::Io`]`(err, `[`None`]`)` -impl> From<(std::io::Error, P)> for Error { - fn from(pieces: (std::io::Error, P)) -> Self { - Self::Io(pieces.0, Some(pieces.1.as_ref().to_path_buf())) - } -} - -/// To help with debugging, JSON errors should be paired with a file path. -/// If a path is unavailable, create the error directly: [`Error::Deserializer`]`(err, `[`None`]`)` -impl> From<(serde_json_core::de::Error, P)> for Error { - fn from(pieces: (serde_json_core::de::Error, P)) -> Self { - Self::Deserializer( - format!("{}", pieces.0), - Some(pieces.1.as_ref().to_path_buf()), - ) - } -} - -#[cfg(feature = "export")] -impl> From<(crate::export::serializers::Error, P)> for Error { - fn from(pieces: (crate::export::serializers::Error, P)) -> Self { - use crate::export::serializers::Error; - let path: Option = Some(pieces.1.as_ref().to_path_buf()); - match pieces.0 { - Error::Io(err) => Self::Io(err, path), - Error::Serializer(err) => Self::Serializer(err, path), - } - } -} - -impl Error { - /// Conversion from [`serializers::Error`](crate::export::serializers::Error) when the path is unavailable - #[cfg(feature = "export")] - pub fn from_serializers_error(err: crate::export::serializers::Error) -> Self { - use crate::export::serializers::Error; - match err { - Error::Io(err) => Self::Io(err, None), - Error::Serializer(err) => Self::Serializer(err, None), - } - } -} - -impl From for DataError { - fn from(err: Error) -> Self { - use Error::*; - match err { - Io(e, Some(path_buf)) => DataError::from(e).with_path(&path_buf), - Io(e, None) => DataError::from(e), - DataProvider(e) => e, - Deserializer(s, Some(path_buf)) => DataError::custom("FS: Deserializer") - .with_display_context(&s) - .with_path(&path_buf), - Deserializer(s, None) => DataError::custom("FS: Deserializer").with_display_context(&s), - #[cfg(feature = "export")] - Serializer(e, Some(path_buf)) => DataError::custom("FS: Serializer") - .with_error_context(&e) - .with_path(&path_buf), - #[cfg(feature = "export")] - Serializer(e, None) => DataError::custom("FS: Serializer").with_display_context(&e), - } - } -} diff --git a/provider/fs/src/export/fs_exporter.rs b/provider/fs/src/export/fs_exporter.rs index 549d573542e..e8c156535df 100644 --- a/provider/fs/src/export/fs_exporter.rs +++ b/provider/fs/src/export/fs_exporter.rs @@ -2,16 +2,13 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::serializers::{json, AbstractSerializer}; -use crate::error::Error; +use super::serializers::AbstractSerializer; use crate::manifest::Manifest; -use crate::manifest::MANIFEST_FILE; use icu_provider::export::DataExporter; use icu_provider::prelude::*; use icu_provider::serde::SerializeMarker; use serde::{Deserialize, Serialize}; use std::fs; -use std::ops::Deref; use std::path::PathBuf; use writeable::Writeable; @@ -57,38 +54,24 @@ impl FilesystemExporter { pub fn try_new( serializer: Box, options: ExporterOptions, - ) -> Result { + ) -> Result { let result = FilesystemExporter { root: options.root, - manifest: Manifest { - buffer_format: serializer.get_buffer_format(), - }, + manifest: Manifest::for_format(serializer.get_buffer_format())?, serializer, }; match options.overwrite { - OverwriteOption::CheckEmpty => { - if result.root.exists() { - fs::remove_dir(&result.root).map_err(|e| (e, &result.root))?; - } + OverwriteOption::CheckEmpty if result.root.exists() => fs::remove_dir(&result.root), + OverwriteOption::RemoveAndReplace if result.root.exists() => { + fs::remove_dir_all(&result.root) } - OverwriteOption::RemoveAndReplace => { - if result.root.exists() { - fs::remove_dir_all(&result.root).map_err(|e| (e, &result.root))?; - } - } - }; - fs::create_dir_all(&result.root).map_err(|e| (e, &result.root))?; + _ => Ok(()), + } + .and_then(|_| fs::create_dir_all(&result.root)) + .map_err(|e| DataError::from(e).with_path_context(&result.root))?; - let manifest_path = result.root.join(MANIFEST_FILE); - let mut manifest_file = - fs::File::create(&manifest_path).map_err(|e| (e, &manifest_path))?; - let manifest_serializer = json::Serializer::new(json::Options { - style: json::StyleOption::Pretty, - }); - manifest_serializer - .serialize(&result.manifest, &mut manifest_file) - .map_err(|e| (e, manifest_path))?; + result.manifest.write(&result.root)?; Ok(result) } } @@ -105,15 +88,17 @@ impl DataExporter for FilesystemExporter { let mut path_buf = self.root.clone(); path_buf.push(&*key.write_to_string()); path_buf.push(&*options.write_to_string()); - path_buf.set_extension(self.manifest.get_file_extension()); + path_buf.set_extension(self.manifest.file_extension); if let Some(parent_dir) = path_buf.parent() { - fs::create_dir_all(&parent_dir).map_err(|e| Error::from((e, parent_dir)))?; + fs::create_dir_all(&parent_dir) + .map_err(|e| DataError::from(e).with_path_context(&parent_dir))?; } - let mut file = fs::File::create(&path_buf).map_err(|e| Error::from((e, &path_buf)))?; + let mut file = fs::File::create(&path_buf) + .map_err(|e| DataError::from(e).with_path_context(&path_buf))?; self.serializer - .serialize(obj.get().deref(), &mut file) - .map_err(|e| Error::from((e, &path_buf)))?; + .serialize(obj, &mut file) + .map_err(|e| e.with_path_context(&path_buf))?; Ok(()) } } diff --git a/provider/fs/src/export/serializers/bincode.rs b/provider/fs/src/export/serializers/bincode.rs index f3e6ee5adab..5501d52fc38 100644 --- a/provider/fs/src/export/serializers/bincode.rs +++ b/provider/fs/src/export/serializers/bincode.rs @@ -3,9 +3,10 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::AbstractSerializer; -use super::Error; use bincode::config::Options as _; use icu_provider::buf::BufferFormat; +use icu_provider::prelude::*; +use icu_provider::serde::SerializeMarker; use std::io; /// A serializer for Bincode. @@ -20,14 +21,12 @@ pub struct Options; impl AbstractSerializer for Serializer { fn serialize( &self, - obj: &dyn erased_serde::Serialize, + obj: DataPayload, mut sink: &mut dyn io::Write, - ) -> Result<(), Error> { - obj.erased_serialize(&mut ::erase( - &mut bincode::Serializer::new( - &mut sink, - bincode::config::DefaultOptions::new().with_fixint_encoding(), - ), + ) -> Result<(), DataError> { + obj.serialize(&mut bincode::Serializer::new( + &mut sink, + bincode::config::DefaultOptions::new().with_fixint_encoding(), ))?; Ok(()) } diff --git a/provider/fs/src/export/serializers/json.rs b/provider/fs/src/export/serializers/json.rs index ddb71ebace3..8c02872ecb8 100644 --- a/provider/fs/src/export/serializers/json.rs +++ b/provider/fs/src/export/serializers/json.rs @@ -3,8 +3,9 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::AbstractSerializer; -use super::Error; use icu_provider::buf::BufferFormat; +use icu_provider::prelude::*; +use icu_provider::serde::SerializeMarker; use std::io::{self, Write}; #[non_exhaustive] @@ -40,22 +41,14 @@ impl Default for Options { impl AbstractSerializer for Serializer { fn serialize( &self, - obj: &dyn erased_serde::Serialize, + obj: DataPayload, sink: &mut dyn io::Write, - ) -> Result<(), Error> { + ) -> Result<(), DataError> { let mut sink = crlify::BufWriterWithLineEndingFix::new(sink); match self.style { - StyleOption::Compact => { - obj.erased_serialize(&mut ::erase( - &mut serde_json::Serializer::new(&mut sink), - ))?; - } - StyleOption::Pretty => { - obj.erased_serialize(&mut ::erase( - &mut serde_json::Serializer::pretty(&mut sink), - ))?; - } - }; + StyleOption::Compact => obj.serialize(&mut serde_json::Serializer::new(&mut sink)), + StyleOption::Pretty => obj.serialize(&mut serde_json::Serializer::pretty(&mut sink)), + }?; // Write an empty line at the end of the document writeln!(sink)?; Ok(()) diff --git a/provider/fs/src/export/serializers/mod.rs b/provider/fs/src/export/serializers/mod.rs index 35c8be6badc..4744045ae6a 100644 --- a/provider/fs/src/export/serializers/mod.rs +++ b/provider/fs/src/export/serializers/mod.rs @@ -6,42 +6,19 @@ pub mod bincode; pub mod json; pub mod postcard; -use displaydoc::Display; use icu_provider::buf::BufferFormat; +use icu_provider::prelude::*; +use icu_provider::serde::SerializeMarker; use std::io; -/// An Error type specifically for the [`Serializer`](serde::Serializer) that doesn't carry filenames -#[derive(Display, Debug)] -#[non_exhaustive] -pub enum Error { - #[displaydoc("{0}")] - Io(io::Error), - #[displaydoc("{0}")] - Serializer(erased_serde::Error), -} - -impl std::error::Error for Error {} - -impl From for Error { - fn from(e: io::Error) -> Self { - Error::Io(e) - } -} - -impl From for Error { - fn from(e: erased_serde::Error) -> Self { - Error::Serializer(e) - } -} - /// A simple serializer trait that works on whole objects. pub trait AbstractSerializer { /// Serializes an object to a sink. fn serialize( &self, - obj: &dyn erased_serde::Serialize, + obj: DataPayload, sink: &mut dyn io::Write, - ) -> Result<(), Error>; + ) -> Result<(), DataError>; /// Gets the buffer format currently being serialized. fn get_buffer_format(&self) -> BufferFormat; diff --git a/provider/fs/src/export/serializers/postcard.rs b/provider/fs/src/export/serializers/postcard.rs index 90204b71c86..f8b62b9037b 100644 --- a/provider/fs/src/export/serializers/postcard.rs +++ b/provider/fs/src/export/serializers/postcard.rs @@ -3,8 +3,9 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::AbstractSerializer; -use super::Error; use icu_provider::buf::BufferFormat; +use icu_provider::prelude::*; +use icu_provider::serde::SerializeMarker; use std::io; /// A serializer for Postcard. @@ -19,13 +20,13 @@ pub struct Options; impl AbstractSerializer for Serializer { fn serialize( &self, - obj: &dyn erased_serde::Serialize, + obj: DataPayload, sink: &mut dyn io::Write, - ) -> Result<(), Error> { + ) -> Result<(), DataError> { let mut serializer = postcard::Serializer { output: postcard::flavors::StdVec(Vec::new()), }; - obj.erased_serialize(&mut ::erase(&mut serializer))?; + obj.serialize(&mut serializer)?; sink.write_all(&serializer.output.0)?; Ok(()) } diff --git a/provider/fs/src/fs_data_provider.rs b/provider/fs/src/fs_data_provider.rs index e8afb40295e..da28440ce36 100644 --- a/provider/fs/src/fs_data_provider.rs +++ b/provider/fs/src/fs_data_provider.rs @@ -2,16 +2,12 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::error::Error; use crate::manifest::Manifest; -use crate::manifest::MANIFEST_FILE; use icu_provider::prelude::*; -use icu_provider::serde::*; -use writeable::Writeable; - use std::fmt::Debug; use std::fs; use std::path::PathBuf; +use writeable::Writeable; /// A data provider that reads ICU4X data from a filesystem directory. /// @@ -25,7 +21,7 @@ use std::path::PathBuf; /// ``` #[derive(Debug, PartialEq)] pub struct FsDataProvider { - res_root: PathBuf, + root: PathBuf, manifest: Manifest, } @@ -40,17 +36,11 @@ impl FsDataProvider { /// let provider = FsDataProvider::try_new("/path/to/data/directory") /// .expect_err("Specify a real directory in the line above"); /// ``` - pub fn try_new>(root: T) -> Result { - let root_path_buf: PathBuf = root.into(); - let manifest_path = root_path_buf.join(MANIFEST_FILE); - let manifest_str = fs::read_to_string(&manifest_path).map_err(|e| (e, &manifest_path))?; - let manifest: Manifest = serde_json_core::from_str(&manifest_str) - .map(|(obj, _)| obj) - .map_err(|e| (e, &manifest_path))?; - check_format_supported(manifest.buffer_format)?; + pub fn try_new>(root: T) -> Result { + let root = root.into(); Ok(Self { - res_root: root_path_buf, - manifest, + manifest: Manifest::parse(&root)?, + root, }) } } @@ -61,17 +51,17 @@ impl BufferProvider for FsDataProvider { key: ResourceKey, req: &DataRequest, ) -> Result, DataError> { - let mut path_buf = self.res_root.clone(); - path_buf.push(&*key.write_to_string()); + let mut path_buf = self.root.join(&*key.write_to_string()); if !path_buf.exists() { return Err(DataErrorKind::MissingResourceKey.with_req(key, req)); } path_buf.push(&*req.options.write_to_string()); - path_buf.set_extension(self.manifest.get_file_extension()); + path_buf.set_extension(self.manifest.file_extension); if !path_buf.exists() { return Err(DataErrorKind::MissingResourceOptions.with_req(key, req)); } - let buffer = fs::read(&path_buf).map_err(|e| DataError::from(e).with_path(&path_buf))?; + let buffer = + fs::read(&path_buf).map_err(|e| DataError::from(e).with_path_context(&path_buf))?; let mut metadata = DataResponseMetadata::default(); // TODO(#1109): Set metadata.data_langid correctly. metadata.buffer_format = Some(self.manifest.buffer_format); diff --git a/provider/fs/src/lib.rs b/provider/fs/src/lib.rs index 67fb32acdf5..162da374ae7 100644 --- a/provider/fs/src/lib.rs +++ b/provider/fs/src/lib.rs @@ -102,12 +102,10 @@ ) )] -mod error; mod fs_data_provider; -pub mod manifest; +mod manifest; #[cfg(feature = "export")] pub mod export; -pub use error::Error as FsDataError; pub use fs_data_provider::FsDataProvider; diff --git a/provider/fs/src/manifest.rs b/provider/fs/src/manifest.rs index 7b54d077c21..21543a98252 100644 --- a/provider/fs/src/manifest.rs +++ b/provider/fs/src/manifest.rs @@ -3,29 +3,78 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use icu_provider::buf::BufferFormat; +use icu_provider::prelude::*; +use std::fs; +use std::path::Path; -/// File name of the manifest. The manifest always uses JSON, even if the serializer isn't JSON. -pub const MANIFEST_FILE: &str = "manifest.json"; - -#[non_exhaustive] -#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] -pub(crate) struct Manifest { +#[derive(Clone, Debug, PartialEq)] +pub struct Manifest { /// Which data serialization file format is used. + pub buffer_format: BufferFormat, + /// The file extension associated with the given buffer format in the manifest. + pub file_extension: &'static str, +} + +#[derive(serde::Serialize, serde::Deserialize)] +struct JsonManifest { #[serde(rename = "syntax")] pub buffer_format: BufferFormat, } impl Manifest { - /// Gets the file extension associated with the given buffer format in the manifest. - pub(crate) fn get_file_extension(&self) -> &str { - match self.buffer_format { - BufferFormat::Json => "json", - BufferFormat::Bincode1 => "bincode", - BufferFormat::Postcard07 => "postcard", - // BufferFormat is non_exhaustive, so we need a catchall case. - // This case could be triggered if a new buffer format is added to the core library - // before it gets added to FsDataProvider. - _ => "und", - } + const NAME: &'static str = "manifest.json"; + + pub(crate) fn for_format(buffer_format: BufferFormat) -> Result { + buffer_format.check_available()?; + Ok(Self { + buffer_format, + file_extension: match buffer_format { + BufferFormat::Json => "json", + BufferFormat::Bincode1 => "bincode", + BufferFormat::Postcard07 => "postcard", + // BufferFormat is non_exhaustive, so we need a catchall case. + // This case could be triggered if a new buffer format is added to the core library + // before it gets added to FsDataProvider. + bf => { + return Err(DataErrorKind::UnavailableBufferFormat(bf) + .with_str_context("Format not supported by FsDataProvider")) + } + }, + }) + } + + pub fn parse(root: &Path) -> Result { + let path = root.join(Self::NAME); + let result: JsonManifest = serde_json_core::from_str( + &fs::read_to_string(&path).map_err(|e| DataError::from(e).with_path_context(&path))?, + ) + .map_err(|e| { + DataError::custom("FsDataProvider manifest deserialization") + .with_path_context(&path) + .with_display_context(&e) + })? + .0; + Self::for_format(result.buffer_format) + } + + #[cfg(feature = "export")] + pub fn write(&self, root: &Path) -> Result<(), DataError> { + let path = root.join(Self::NAME); + let mut file = crlify::BufWriterWithLineEndingFix::new( + fs::File::create(&path).map_err(|e| DataError::from(e).with_path_context(&path))?, + ); + serde::Serialize::serialize( + &JsonManifest { + buffer_format: self.buffer_format, + }, + &mut serde_json::Serializer::pretty(&mut file), + ) + .map_err(|e| { + DataError::custom("FsDataProvider manifest serialization") + .with_path_context(&path) + .with_display_context(&e) + })?; + use std::io::Write; + writeln!(&mut file).map_err(|e| DataError::from(e).with_path_context(&path)) } }