Skip to content

Commit

Permalink
Add "HumanBinaryData" as an alternative to "Base64UrlSafeData" (kanid…
Browse files Browse the repository at this point in the history
…m#354)

* Add "HumanBinaryData" as alternative to "Base64UrlSafeData" (kanidm#352)

* Add bytes support to Base64UrlSafeData, and copy across the tests

* Rework the docs

* dedupe functionality into macros, make tests consistent

* more tests and conversions

* move Borrow impl into common

* add some more vec features

* fix clippy
micolous authored and kikuomax committed Nov 4, 2024
1 parent 6e83f0d commit f92c115
Showing 5 changed files with 560 additions and 100 deletions.
4 changes: 4 additions & 0 deletions base64urlsafedata/Cargo.toml
Original file line number Diff line number Diff line change
@@ -16,4 +16,8 @@ readme = "README.md"
[dependencies]
serde.workspace = true
base64.workspace = true
paste = "1.0.14"

[dev-dependencies]
serde_cbor_2.workspace = true
serde_json.workspace = true
176 changes: 176 additions & 0 deletions base64urlsafedata/src/common.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/// Macro to declare common functionality for [`Base64UrlSafeData`][0] and
/// [`HumanBinaryData`][1]
///
/// [0]: crate::Base64UrlSafeData
/// [1]: crate::HumanBinaryData
macro_rules! common_impls {
($type:ty) => {
impl $type {
pub const fn new() -> Self {
Self(Vec::new())
}

pub fn with_capacity(capacity: usize) -> Self {
Vec::with_capacity(capacity).into()
}
}

impl std::ops::Deref for $type {
type Target = Vec<u8>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl std::ops::DerefMut for $type {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl std::borrow::Borrow<[u8]> for $type {
fn borrow(&self) -> &[u8] {
self.0.as_slice()
}
}

impl From<Vec<u8>> for $type {
fn from(value: Vec<u8>) -> Self {
Self(value)
}
}

impl<const N: usize> From<[u8; N]> for $type {
fn from(value: [u8; N]) -> Self {
Self(value.to_vec())
}
}

impl From<&[u8]> for $type {
fn from(value: &[u8]) -> Self {
Self(value.to_vec())
}
}

impl<const N: usize> From<&[u8; N]> for $type {
fn from(value: &[u8; N]) -> Self {
Self(value.to_vec())
}
}

impl From<$type> for Vec<u8> {
fn from(value: $type) -> Self {
value.0
}
}

impl AsRef<[u8]> for $type {
fn as_ref(&self) -> &[u8] {
&self.0
}
}

macro_rules! partial_eq_impl {
($other:ty) => {
impl PartialEq<$other> for $type {
fn eq(&self, other: &$other) -> bool {
self.as_slice() == &other[..]
}
}

impl PartialEq<$type> for $other {
fn eq(&self, other: &$type) -> bool {
self.eq(&other.0)
}
}
};
}

partial_eq_impl!(Vec<u8>);
partial_eq_impl!([u8]);
partial_eq_impl!(&[u8]);

impl<const N: usize> PartialEq<[u8; N]> for $type {
fn eq(&self, other: &[u8; N]) -> bool {
self.0.eq(other)
}
}

impl<const N: usize> PartialEq<$type> for [u8; N] {
fn eq(&self, other: &$type) -> bool {
self.as_slice().eq(&other.0)
}
}

paste! {
#[doc(hidden)]
struct [<$type Visitor>];

impl<'de> serde::de::Visitor<'de> for [<$type Visitor>] {
type Value = $type;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(
formatter,
"a url-safe base64-encoded string, bytes, or sequence of integers"
)
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
// Forgive alt base64 decoding formats
for config in crate::ALLOWED_DECODING_FORMATS {
if let Ok(data) = config.decode(v) {
return Ok(<$type>::from(data));
}
}

Err(serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &self))
}

fn visit_seq<A>(self, mut v: A) -> Result<Self::Value, A::Error>
where
A: serde::de::SeqAccess<'de>,
{
let mut data = if let Some(sz) = v.size_hint() {
Vec::with_capacity(sz)
} else {
Vec::new()
};

while let Some(i) = v.next_element()? {
data.push(i)
}
Ok(<$type>::from(data))
}

fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(<$type>::from(v))
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(<$type>::from(v))
}
}

impl<'de> serde::Deserialize<'de> for $type {
fn deserialize<D>(deserializer: D) -> Result<Self, <D as serde::Deserializer<'de>>::Error>
where
D: serde::Deserializer<'de>,
{
// Was previously _str
deserializer.deserialize_any([<$type Visitor>])
}
}
}
};
}
49 changes: 49 additions & 0 deletions base64urlsafedata/src/human.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use std::fmt;

use crate::{Base64UrlSafeData, URL_SAFE_NO_PAD};
use base64::Engine;
use serde::{Serialize, Serializer};

/// Serde wrapper for `Vec<u8>` which emits URL-safe, non-padded Base64 for
/// *only* human-readable formats, and accepts Base64 and binary formats.
///
/// * Deserialisation is described in the [module documentation][crate].
///
/// * Serialisation to [a human-readable format][0] (such as JSON) emits
/// URL-safe, non-padded Base64 (per [RFC 4648 §5][sec5]).
///
/// * Serialisation to [a non-human-readable format][0] (such as CBOR) emits
/// a native "bytes" type, and not encode the value.
///
/// [0]: https://docs.rs/serde/latest/serde/trait.Serializer.html#method.is_human_readable
/// [sec5]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub struct HumanBinaryData(Vec<u8>);

common_impls!(HumanBinaryData);

impl From<Base64UrlSafeData> for HumanBinaryData {
fn from(value: Base64UrlSafeData) -> Self {
Self(value.into())
}
}

impl PartialEq<Base64UrlSafeData> for HumanBinaryData {
fn eq(&self, other: &Base64UrlSafeData) -> bool {
self.0.eq(other)
}
}

impl Serialize for HumanBinaryData {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
if serializer.is_human_readable() {
let encoded = URL_SAFE_NO_PAD.encode(self);
serializer.serialize_str(&encoded)
} else {
serializer.serialize_bytes(self)
}
}
}
212 changes: 112 additions & 100 deletions base64urlsafedata/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,76 @@
//! Base64 data that encodes to Base64 UrlSafe, but can decode from multiple
//! base64 implementations to account for various clients and libraries. Compatible
//! with serde.
//! Wrappers for `Vec<u8>` to make Serde serialise and deserialise as URL-safe,
//! non-padded Base64 (per [RFC 4648 §5][sec5]).
//!
//! ## Serialisation behaviour
//!
//! * [`Base64UrlSafeData`] always serialises to URL-safe, non-padded Base64.
//!
//! * [`HumanBinaryData`] only serialises to URL-safe, non-padded Base64 when
//! using a [human-readable format][0].
//!
//! Otherwise, it serialises as a "bytes"-like type (like [`serde_bytes`][1]).
//!
//! This feature is new in `base64urlsafe` v0.1.4.
//!
//! By comparison, Serde's default behaviour is to serialise `Vec<u8>` as a
//! sequence of integers. This is a problem for many formats:
//!
//! * `serde_cbor` encodes as an `array`, rather than a `bytes`. This uses
//! zig-zag encoded integers for values > `0x1F`, which averages about 1.88
//! bytes per byte assuming an equal distribution of values.
//!
//! * `serde_json` encodes as an `Array<Number>`, which averages 3.55 bytes per
//! byte without whitespace.
//!
//! Using Base64 encoding averages 1.33 bytes per byte, and most formats pass
//! strings nearly-verbatim.
//!
//! ## Deserialisation behaviour
//!
//! Both types will deserialise multiple formats, provided the format is
//! self-describing (ie: [implements `deserialize_any`][5]):
//!
//! * Bytes types are passed as-is (new in v0.1.4).
//!
//! [`HumanBinaryData`] produces this for [non-human-readable formats][0].
//!
//! * Sequences of integers are passed as-is.
//!
//! Serde's default `Vec<u8>` serialiser produces this for many formats.
//!
//! * Strings are decoded Base64 per [RFC 4648 §5 (URL-safe)][sec5] or
//! [§4 (standard)][sec4], with optional padding.
//!
//! [`Base64UrlSafeData`] produces this for all formats, and
//! [`HumanBinaryData`] produces this for [human-readable formats][0]. This
//! should also be compatible with many other serialisers.
//!
//! ## Migrating from `Base64UrlSafeData` to `HumanBinaryData`
//!
//! [`Base64UrlSafeData`] always uses Base64 encoding, which isn't optimal for
//! many binary formats. For that reason, it's a good idea to migrate to
//! [`HumanBinaryData`] if you're using a binary format.
//!
//! However, you'll need to make sure *all* readers using [`Base64UrlSafeData`]
//! are on `base64urlsafedata` v0.1.4 or later before switching *anything* to
//! [`HumanBinaryData`]. Otherwise, they'll not be able to read any data in the
//! new format!
//!
//! Once they're all migrated across, you can start issuing writes in the new
//! format. It's a good idea to slowly roll out the change, in case you discover
//! something has been left behind.
//!
//! ## Alternatives
//!
//! * [`serde_bytes`][1], which implements efficient coding of `Vec<u8>`
//! [for non-human-readable formats only][2].
//!
//! [0]: https://docs.rs/serde/latest/serde/trait.Serializer.html#method.is_human_readable
//! [1]: https://docs.rs/serde_bytes
//! [2]: https://github.com/serde-rs/bytes/issues/37
//! [5]: https://serde.rs/impl-deserialize.html
//! [sec4]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
//! [sec5]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
#![deny(warnings)]
#![warn(unused_extern_crates)]
#![deny(clippy::todo)]
@@ -14,56 +83,68 @@
#![deny(clippy::needless_pass_by_value)]
#![deny(clippy::trivially_copy_pass_by_ref)]

#[macro_use]
extern crate paste;

#[macro_use]
mod common;
mod human;
#[cfg(test)]
mod tests;

pub use crate::human::HumanBinaryData;

use base64::{
engine::general_purpose::{
GeneralPurpose, STANDARD, STANDARD_NO_PAD, URL_SAFE, URL_SAFE_NO_PAD,
},
Engine,
};
use serde::de::{Error, SeqAccess, Unexpected, Visitor};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Borrow;
use serde::{Serialize, Serializer};
use std::convert::TryFrom;
use std::fmt;
use std::hash::Hash;

static ALLOWED_DECODING_FORMATS: &[GeneralPurpose] =
&[URL_SAFE_NO_PAD, URL_SAFE, STANDARD, STANDARD_NO_PAD];

/// Serde wrapper for `Vec<u8>` which always emits URL-safe, non-padded Base64,
/// and accepts Base64 and binary formats.
///
/// * Deserialisation is described in the [module documentation][crate].
///
/// * Serialisation *always* emits URL-safe, non-padded Base64 (per
/// [RFC 4648 §5][sec5]).
///
/// Unlike [`HumanBinaryData`], this happens *regardless* of whether the
/// underlying serialisation format is [human readable][0]. If you're
/// serialising to [non-human-readable formats][0], you should consider
/// [migrating to `HumanBinaryData`][crate].
///
/// Otherwise, this type should work as much like a `Vec<u8>` as possible.
///
/// [0]: https://docs.rs/serde/latest/serde/trait.Serializer.html#method.is_human_readable
/// [sec5]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
/// A container for binary that should be base64 encoded in serialisation. In reverse
/// when deserializing, will decode from many different types of base64 possible.
pub struct Base64UrlSafeData(pub Vec<u8>);

impl fmt::Display for Base64UrlSafeData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", URL_SAFE_NO_PAD.encode(self))
}
}

impl Borrow<[u8]> for Base64UrlSafeData {
fn borrow(&self) -> &[u8] {
self.0.as_slice()
}
}
common_impls!(Base64UrlSafeData);

impl From<Vec<u8>> for Base64UrlSafeData {
fn from(v: Vec<u8>) -> Base64UrlSafeData {
Base64UrlSafeData(v)
impl From<HumanBinaryData> for Base64UrlSafeData {
fn from(value: HumanBinaryData) -> Self {
Self(value.into())
}
}

// We have to allow this because we can't implement a trait on an external type
#[allow(clippy::from_over_into)]
impl Into<Vec<u8>> for Base64UrlSafeData {
fn into(self) -> Vec<u8> {
self.0
impl PartialEq<HumanBinaryData> for Base64UrlSafeData {
fn eq(&self, other: &HumanBinaryData) -> bool {
self.0.eq(other)
}
}

impl AsRef<[u8]> for Base64UrlSafeData {
fn as_ref(&self) -> &[u8] {
&self.0
impl fmt::Display for Base64UrlSafeData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", URL_SAFE_NO_PAD.encode(self))
}
}

@@ -80,56 +161,6 @@ impl TryFrom<&str> for Base64UrlSafeData {
}
}

struct Base64UrlSafeDataVisitor;

impl<'de> Visitor<'de> for Base64UrlSafeDataVisitor {
type Value = Base64UrlSafeData;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a base64 url encoded string")
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: Error,
{
// Forgive alt base64 decoding formats
for config in ALLOWED_DECODING_FORMATS {
if let Ok(data) = config.decode(v) {
return Ok(Base64UrlSafeData(data));
}
}

Err(serde::de::Error::invalid_value(Unexpected::Str(v), &self))
}

fn visit_seq<A>(self, mut v: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut data = if let Some(sz) = v.size_hint() {
Vec::with_capacity(sz)
} else {
Vec::new()
};

while let Some(i) = v.next_element()? {
data.push(i)
}
Ok(Base64UrlSafeData(data))
}
}

impl<'de> Deserialize<'de> for Base64UrlSafeData {
fn deserialize<D>(deserializer: D) -> Result<Self, <D as Deserializer<'de>>::Error>
where
D: Deserializer<'de>,
{
// Was previously _str
deserializer.deserialize_any(Base64UrlSafeDataVisitor)
}
}

impl Serialize for Base64UrlSafeData {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@@ -139,22 +170,3 @@ impl Serialize for Base64UrlSafeData {
serializer.serialize_str(&encoded)
}
}

#[cfg(test)]
mod tests {
use crate::Base64UrlSafeData;
use std::convert::TryFrom;

#[test]
fn test_try_from() {
assert!(Base64UrlSafeData::try_from("aGVsbG8=").is_ok());
assert!(Base64UrlSafeData::try_from("abcdefghij").is_err());
}

#[test]
fn test_try_from_json() {
// let _: Base64UrlSafeData = serde_json::from_str("\"aGVsbG8=\"")
// .expect("Invalid Data");
assert!(serde_json::from_str::<Base64UrlSafeData>("[0,1,2,3]").is_ok());
}
}
219 changes: 219 additions & 0 deletions base64urlsafedata/src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
use super::*;
use std::convert::TryFrom;

#[test]
fn test_try_from() {
assert!(Base64UrlSafeData::try_from("aGVsbG8=").is_ok());
assert!(Base64UrlSafeData::try_from("abcdefghij").is_err());
}

macro_rules! from_json_test {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (input, expected): (&str, &[u8]) = $value;
assert_eq!(serde_json::from_str::<Base64UrlSafeData>(input).unwrap(), expected);
assert_eq!(serde_json::from_str::<HumanBinaryData>(input).unwrap(), expected);
}
)*
};
}

macro_rules! from_invalid_json_test {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let input: &str = $value;
assert!(serde_json::from_str::<Base64UrlSafeData>(input).is_err());
assert!(serde_json::from_str::<HumanBinaryData>(input).is_err());
}
)*
};
}

macro_rules! from_cbor_test {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (input, expected): (&[u8], &[u8]) = $value;
assert_eq!(serde_cbor_2::from_slice::<Base64UrlSafeData>(input).unwrap(), expected);
assert_eq!(serde_cbor_2::from_slice::<HumanBinaryData>(input).unwrap(), expected);
}
)*
};
}

macro_rules! from_invalid_cbor_test {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let input: &[u8] = $value;
assert!(serde_cbor_2::from_slice::<Base64UrlSafeData>(input).is_err());
assert!(serde_cbor_2::from_slice::<HumanBinaryData>(input).is_err());
}
)*
};
}

from_json_test! {
from_json_empty_array: ("[]", &[]),
from_json_as_array_number: ("[0,1,2,255]", &[0x00, 0x01, 0x02, 0xFF]),
from_json_as_array_number_whitespace: ("[0, 1, 2, 255]", &[0x00, 0x01, 0x02, 0xFF]),
from_json_empty_string: ("\"\"", &[]),
from_json_b64_urlsafe_nonpadded: ("\"AAEC_w\"", &[0x00, 0x01, 0x02, 0xFF]),
from_json_b64_urlsafe_padded: ("\"AAEC_w==\"", &[0x00, 0x01, 0x02, 0xFF]),
from_json_b64_standard_nonpadded: ("\"AAEC/w\"", &[0x00, 0x01, 0x02, 0xFF]),
from_json_b64_standard_padded: ("\"AAEC/w==\"", &[0x00, 0x01, 0x02, 0xFF]),
}

from_invalid_json_test! {
from_json_empty: "",
from_json_null: "null",
from_json_number: "1",
from_json_empty_map: "{}",
from_json_map: "{\"1\": \"AAEC_w\"}",
}

from_cbor_test! {
from_cbor_bytes: (&[
0x44, // bytes(4)
0x00, 0x01, 0x02, 0xFF,
], &[0x00, 0x01, 0x02, 0xFF]),
from_cbor_array: (&[
0x84, // array(4)
0x00, // 0
0x01, // 1
0x02, // 2
0x18, 0xff, // 0xff
], &[0x00, 0x01, 0x02, 0xFF]),
from_cbor_empty_array: (&[0x80], &[]), // array(0)
from_cbor_empty_string: (&[0x60], &[]), // text(0)
from_cbor_string_b64_urlsafe_nonpadded: (&[
0x66, // text(6)
0x41, 0x41, 0x45, 0x43, 0x5F, 0x77, // "AAEC_w"
], &[0x00, 0x01, 0x02, 0xFF]),
from_cbor_string_b64_urlsafe_padded: (&[
0x68, // text(8)
0x41, 0x41, 0x45, 0x43, 0x5F, 0x77, 0x3D, 0x3D // "AAEC_w=="
], &[0x00, 0x01, 0x02, 0xFF]),
from_cbor_string_b64_standard_nonpadded: (&[
0x66, // text(6)
0x41, 0x41, 0x45, 0x43, 0x2F, 0x77, // "AAEC/w"
], &[0x00, 0x01, 0x02, 0xFF]),
from_cbor_string_b64_standard_padded: (&[
0x68, // text(8)
0x41, 0x41, 0x45, 0x43, 0x2F, 0x77, 0x3D, 0x3D // "AAEC/w=="
], &[0x00, 0x01, 0x02, 0xFF]),
}

from_invalid_cbor_test! {
from_seq_string: &[0x82, 0x61, 0x61, 0x61, 0x62],
from_empty: &[],
from_positive_int: &[0x01],
from_negative_int: &[0x20],
from_seq_negative_int: &[0x82, 0x20, 0x21],
from_seq_positive_and_negative_int: &[0x82, 0x01, 0x20],
}

#[test]
fn to_json() {
let input = [0x00, 0x01, 0x02, 0xff];

// JSON output should always be a base64 string
assert_eq!(
serde_json::to_string(&Base64UrlSafeData::from(input)).unwrap(),
"\"AAEC_w\"",
);
assert_eq!(
serde_json::to_string(&HumanBinaryData::from(input)).unwrap(),
"\"AAEC_w\"",
);
}

#[test]
fn to_cbor() {
let input = [0x00, 0x01, 0x02, 0xff];

// Base64UrlSafeData CBOR output should be a base64 encoded string
assert_eq!(
serde_cbor_2::to_vec(&Base64UrlSafeData::from(input)).unwrap(),
vec![
0x66, // text(6)
0x41, 0x41, 0x45, 0x43, 0x5F, 0x77 // "AAEC_w"
]
);

// HumanBinaryData CBOR output should be a bytes
assert_eq!(
serde_cbor_2::to_vec(&HumanBinaryData::from(input)).unwrap(),
vec![
0x44, // bytes(4)
0x00, 0x01, 0x02, 0xff
]
);
}

#[test]
fn interop_from() {
let input = [0x00, 0x01, 0x02, 0xff];
let a = Base64UrlSafeData::from(input.as_ref());
let b = HumanBinaryData::from(input.as_ref());

let c = Base64UrlSafeData::from(b.clone());
assert_eq!(a, c);
let d = HumanBinaryData::from(a);
assert_eq!(b, d);
}

#[test]
fn interop_equality() {
let input = [0x00, 0x01, 0x02, 0xff];
let other = [0xff, 0x00, 0x01, 0x02];

assert_eq!(
Base64UrlSafeData::from(input.as_ref()),
HumanBinaryData::from(input.as_ref()),
);

assert_eq!(
HumanBinaryData::from(input.as_ref()),
Base64UrlSafeData::from(input.as_ref()),
);

assert_eq!(input, Base64UrlSafeData::from(input.as_ref()));
assert_eq!(Base64UrlSafeData::from(input.as_ref()), input);
assert_eq!(input, HumanBinaryData::from(input.as_ref()));
assert_eq!(HumanBinaryData::from(input.as_ref()), input);

assert_ne!(
Base64UrlSafeData::from(input.as_ref()),
HumanBinaryData::from(other.as_ref()),
);

assert_ne!(
HumanBinaryData::from(input.as_ref()),
Base64UrlSafeData::from(other.as_ref()),
);

assert_ne!(input, Base64UrlSafeData::from(other.as_ref()));
assert_ne!(Base64UrlSafeData::from(other.as_ref()), input);
assert_ne!(input, HumanBinaryData::from(other.as_ref()));
assert_ne!(HumanBinaryData::from(other.as_ref()), input);
}

#[test]
fn interop_vec() {
let mut a = Base64UrlSafeData::from([0, 1, 2, 3]);
a.push(4);
assert_eq!(vec![0, 1, 2, 3, 4], a);
assert_eq!(5, a.len());

let mut a = HumanBinaryData::from([0, 1, 2, 3]);
a.push(4);
assert_eq!(vec![0, 1, 2, 3, 4], a);
assert_eq!(5, a.len());
}

0 comments on commit f92c115

Please sign in to comment.