From 2afab79d0e54c82f3e61670ae3f5f3ed4b765a6b Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Fri, 29 Sep 2023 12:26:15 +1000 Subject: [PATCH] Add "HumanBinaryData" as alternative to "Base64UrlSafeData" (#352) --- base64urlsafedata/Cargo.toml | 3 + base64urlsafedata/src/human.rs | 325 +++++++++++++++++++++++++++++++++ base64urlsafedata/src/lib.rs | 4 + 3 files changed, 332 insertions(+) create mode 100644 base64urlsafedata/src/human.rs diff --git a/base64urlsafedata/Cargo.toml b/base64urlsafedata/Cargo.toml index 72f512f1..053813cb 100644 --- a/base64urlsafedata/Cargo.toml +++ b/base64urlsafedata/Cargo.toml @@ -16,4 +16,7 @@ readme = "README.md" [dependencies] serde.workspace = true base64.workspace = true + +[dev-dependencies] +serde_cbor_2.workspace = true serde_json.workspace = true diff --git a/base64urlsafedata/src/human.rs b/base64urlsafedata/src/human.rs new file mode 100644 index 00000000..50c5c822 --- /dev/null +++ b/base64urlsafedata/src/human.rs @@ -0,0 +1,325 @@ +use std::{ + fmt, + ops::{Deref, DerefMut}, +}; + +use crate::{ALLOWED_DECODING_FORMATS, URL_SAFE_NO_PAD}; +use base64::Engine; +use serde::de::{Error, SeqAccess, Unexpected, Visitor}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +/// Wrapper for `Vec` which changes Serde's serialisation and +/// deserialisation behaviour: +/// +/// * on serialisation to [a "human-readable" format][0] (such as JSON), it +/// should emit URL-safe, non-padded Base64 (per RFC 4648 §5). +/// +/// * on serialisation to [a "non-human-readable" format][0] (such as CBOR), it +/// should emit a native "bytes" type, and not encode the value. +/// +/// * deserialisation accepts multiple input types: +/// +/// * a bytes type, which is passed as-is +/// * a sequence of integers, which is passed as-is +/// * a string, which is decoded Base64 per RFC 4648 §5 (URL-safe) or §4 +/// (standard), with optional padding +/// +/// Otherwise, this type should work as much like a `Vec` as possible. +/// +/// **See also:** [`serde_bytes`][1], which implements efficient coding of +/// `Vec` [for non-human-readable formats][2]. +/// +/// [0]: https://docs.rs/serde/latest/serde/trait.Serializer.html#method.is_human_readable +/// [1]: https://docs.rs/serde_bytes +/// [2]: https://github.com/serde-rs/bytes/issues/37 +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct HumanBinaryData(Vec); + +impl Deref for HumanBinaryData { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for HumanBinaryData { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl From> for HumanBinaryData { + fn from(value: Vec) -> Self { + Self(value) + } +} + +impl From<[u8; N]> for HumanBinaryData { + fn from(value: [u8; N]) -> Self { + Self(value.to_vec()) + } +} + +impl From<&[u8]> for HumanBinaryData { + fn from(value: &[u8]) -> Self { + Self(value.to_vec()) + } +} + +impl From for Vec { + fn from(value: HumanBinaryData) -> Self { + value.0 + } +} + +impl AsRef<[u8]> for HumanBinaryData { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +macro_rules! partial_eq_impl { + ($type:ty) => { + impl PartialEq<$type> for HumanBinaryData { + fn eq(&self, other: &$type) -> bool { + self.0.eq(other) + } + } + + impl PartialEq for $type { + fn eq(&self, other: &HumanBinaryData) -> bool { + self.eq(&other.0) + } + } + }; +} + +partial_eq_impl!(Vec); +partial_eq_impl!([u8]); + +impl PartialEq<[u8; N]> for HumanBinaryData { + fn eq(&self, other: &[u8; N]) -> bool { + self.0.eq(other) + } +} + +impl PartialEq for [u8; N] { + fn eq(&self, other: &HumanBinaryData) -> bool { + self.as_slice().eq(&other.0) + } +} + +struct HumanBinaryDataVisitor; + +impl<'de> Visitor<'de> for HumanBinaryDataVisitor { + type Value = HumanBinaryData; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a url-safe base64-encoded string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + // Forgive alt base64 decoding formats + for config in ALLOWED_DECODING_FORMATS { + if let Ok(data) = config.decode(v) { + return Ok(HumanBinaryData(data)); + } + } + + Err(serde::de::Error::invalid_value(Unexpected::Str(v), &self)) + } + + fn visit_seq(self, mut v: A) -> Result + where + A: SeqAccess<'de>, + { + let mut data = if let Some(sz) = v.size_hint() { + Vec::with_capacity(sz) + } else { + Vec::new() + }; + + while let Some(i) = v.next_element()? { + data.push(i) + } + Ok(HumanBinaryData(data)) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + Ok(HumanBinaryData(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + Ok(HumanBinaryData(v.into())) + } +} + +impl<'de> Deserialize<'de> for HumanBinaryData { + fn deserialize(deserializer: D) -> Result>::Error> + where + D: Deserializer<'de>, + { + // Was previously _str + deserializer.deserialize_any(HumanBinaryDataVisitor) + } +} + +impl Serialize for HumanBinaryData { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if serializer.is_human_readable() { + let encoded = URL_SAFE_NO_PAD.encode(self); + serializer.serialize_str(&encoded) + } else { + serializer.serialize_bytes(self) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn from_json() { + let expected = [0x00, 0x01, 0x02, 0xff]; + + // JSON as Array + assert_eq!( + serde_json::from_str::("[0,1,2,255]").unwrap(), + expected + ); + + // JSON as Array with whitespace + assert_eq!( + serde_json::from_str::("[0, 1, 2, 255]").unwrap(), + expected + ); + + // RFC 4648 §5 non-padded (URL-safe) + assert_eq!( + serde_json::from_str::("\"AAEC_w\"").unwrap(), + expected + ); + + // RFC 4648 §5 padded (URL-safe) + assert_eq!( + serde_json::from_str::("\"AAEC_w==\"").unwrap(), + expected + ); + + // RFC 4648 §4 non-padded (standard) + assert_eq!( + serde_json::from_str::("\"AAEC/w\"").unwrap(), + expected + ); + + // RFC 4648 §4 padded (standard) + assert_eq!( + serde_json::from_str::("\"AAEC/w==\"").unwrap(), + expected + ); + } + + #[test] + fn to_json() { + let input = HumanBinaryData(vec![0x00, 0x01, 0x02, 0xff]); + + // JSON output should be a String, RFC 4648 §5 non-padded (URL-safe) + assert_eq!(serde_json::to_string(&input).unwrap(), "\"AAEC_w\""); + } + + #[test] + fn from_cbor() { + let expected = [0x00, 0x01, 0x02, 0xff]; + + // Data as bytes + assert_eq!( + serde_cbor_2::from_slice::(&[ + 0x44, // bytes(4) + 0x00, 0x01, 0x02, 0xff + ]) + .unwrap(), + expected + ); + + // Data as array + assert_eq!( + serde_cbor_2::from_slice::(&[ + 0x84, // array(4) + 0x00, // 0 + 0x01, // 1 + 0x02, // 2 + 0x18, 0xff // 0xff + ]) + .unwrap(), + expected + ); + + // RFC 4648 §5 non-padded (URL-safe) + assert_eq!( + serde_cbor_2::from_slice::(&[ + 0x66, // text(6) + 0x41, 0x41, 0x45, 0x43, 0x5F, 0x77, // "AAEC_w" + ]) + .unwrap(), + expected + ); + + // RFC 4648 §5 padded (URL-safe) + assert_eq!( + serde_cbor_2::from_slice::(&[ + 0x68, // text(8) + 0x41, 0x41, 0x45, 0x43, 0x5F, 0x77, 0x3D, 0x3D // "AAEC_w==" + ]) + .unwrap(), + expected + ); + + // RFC 4648 §4 non-padded (standard) + assert_eq!( + serde_cbor_2::from_slice::(&[ + 0x66, // text(6) + 0x41, 0x41, 0x45, 0x43, 0x2F, 0x77, // "AAEC/w" + ]) + .unwrap(), + expected + ); + + // RFC 4648 §4 padded (standard) + assert_eq!( + serde_cbor_2::from_slice::(&[ + 0x68, // text(8) + 0x41, 0x41, 0x45, 0x43, 0x2F, 0x77, 0x3D, 0x3D // "AAEC/w==" + ]) + .unwrap(), + expected + ); + } + + #[test] + fn to_cbor() { + let input = HumanBinaryData(vec![0x00, 0x01, 0x02, 0xff]); + + // CBOR output should be bytes, not Base64 encoded + assert_eq!( + serde_cbor_2::to_vec(&input).unwrap(), + vec![ + 0x44, // bytes(4) + 0x00, 0x01, 0x02, 0xff + ] + ); + } +} diff --git a/base64urlsafedata/src/lib.rs b/base64urlsafedata/src/lib.rs index 21d215ef..324dae1a 100644 --- a/base64urlsafedata/src/lib.rs +++ b/base64urlsafedata/src/lib.rs @@ -14,6 +14,10 @@ #![deny(clippy::needless_pass_by_value)] #![deny(clippy::trivially_copy_pass_by_ref)] +mod human; + +pub use crate::human::HumanBinaryData; + use base64::{ engine::general_purpose::{ GeneralPurpose, STANDARD, STANDARD_NO_PAD, URL_SAFE, URL_SAFE_NO_PAD,