diff --git a/Cargo.toml b/Cargo.toml index e60a1c1..4112826 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ license = "Apache-2.0" name = "sonic-rs" readme = "README.md" repository = "https://github.com/cloudwego/sonic-rs" -version = "0.5.5" +version = "0.5.6" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/scripts/fmt_fix.sh b/scripts/fmt_fix.sh new file mode 100755 index 0000000..816cbb5 --- /dev/null +++ b/scripts/fmt_fix.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +cd "${REPO_ROOT}" + +echo "Updating Rust toolchain..." +rustup update + +if [[ "${1:-}" == "--check" ]]; then + cargo fmt --all -- --check +else + cargo fmt --all +fi + diff --git a/src/config.rs b/src/config.rs index 802d2ad..6e1696a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,3 +3,8 @@ pub(crate) struct DeserializeCfg { pub use_rawnumber: bool, pub utf8_lossy: bool, } + +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct SerializeCfg { + pub sort_map_keys: bool, +} diff --git a/src/format.rs b/src/format.rs index ccdb792..539c5c7 100644 --- a/src/format.rs +++ b/src/format.rs @@ -8,7 +8,7 @@ use crate::{serde::tri, util::string::format_string, writer::WriteExt}; /// This trait abstracts away serializing the JSON control characters, which allows the user to /// optionally pretty print the JSON output. -pub trait Formatter { +pub trait Formatter: Clone { /// Writes a `null` value to the specified writer. #[inline] fn write_null(&mut self, writer: &mut W) -> io::Result<()> diff --git a/src/serde/mod.rs b/src/serde/mod.rs index 8ec8a00..9479b6d 100644 --- a/src/serde/mod.rs +++ b/src/serde/mod.rs @@ -30,7 +30,7 @@ mod test { use bytes::Bytes; use faststr::FastStr; - use serde::{de::IgnoredAny, Deserialize, Serialize}; + use serde::{de::IgnoredAny, ser::SerializeMap, Deserialize, Serialize}; use super::*; use crate::{Result, Value}; @@ -50,6 +50,84 @@ mod test { }; } + struct UnorderedMap<'a> { + entries: &'a [(&'a str, u8)], + } + + impl Serialize for UnorderedMap<'_> { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + let mut map = serializer.serialize_map(Some(self.entries.len()))?; + for (key, value) in self.entries { + map.serialize_entry(key, value)?; + } + map.end() + } + } + + #[test] + fn test_serializer_sort_map_keys_toggle() { + let entries = [("b", 1u8), ("a", 2u8), ("c", 3u8)]; + let unordered = UnorderedMap { entries: &entries }; + + let mut ser = Serializer::new(Vec::new()); + unordered.serialize(&mut ser).unwrap(); + let output = String::from_utf8(ser.into_inner()).unwrap(); + let expected_default = r#"{"b":1,"a":2,"c":3}"#; + assert_eq!(output, expected_default); + + let mut ser = Serializer::new(Vec::new()).sort_map_keys(); + unordered.serialize(&mut ser).unwrap(); + let output = String::from_utf8(ser.into_inner()).unwrap(); + assert_eq!(output, r#"{"a":2,"b":1,"c":3}"#); + } + + #[test] + fn test_value_to_string_sort_behavior() { + let value: Value = crate::json!({"b": 1, "a": 2, "c": 3}); + + let json = to_string(&value).unwrap(); + let expected_sorted = r#"{"a":2,"b":1,"c":3}"#; + + if cfg!(feature = "sort_keys") { + assert_eq!(json, expected_sorted); + } else { + let parsed: serde_json::Value = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, serde_json::json!({"b": 1, "a": 2, "c": 3})); + } + + let mut ser = Serializer::new(Vec::new()).sort_map_keys(); + value.serialize(&mut ser).unwrap(); + let sorted_json = String::from_utf8(ser.into_inner()).unwrap(); + assert_eq!(sorted_json, expected_sorted); + } + + #[test] + fn test_value_serializer_sort_map_keys() { + let value: Value = crate::json!({"delta": 4, "beta": 2, "alpha": 1}); + + let mut ser = Serializer::new(Vec::new()); + value.serialize(&mut ser).unwrap(); + let default_json = String::from_utf8(ser.into_inner()).unwrap(); + + if cfg!(feature = "sort_keys") { + assert_eq!(default_json, r#"{"alpha":1,"beta":2,"delta":4}"#); + } else { + let parsed: serde_json::Value = serde_json::from_str(&default_json).unwrap(); + assert_eq!( + parsed, + serde_json::json!({"delta": 4, "beta": 2, "alpha": 1}) + ); + } + + let mut ser = Serializer::new(Vec::new()).sort_map_keys(); + value.serialize(&mut ser).unwrap(); + let sorted = String::from_utf8(ser.into_inner()).unwrap(); + assert_eq!(sorted, r#"{"alpha":1,"beta":2,"delta":4}"#); + } + #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Foo { name: FastStr, diff --git a/src/serde/ser.rs b/src/serde/ser.rs index 0e3ff7d..7933e3f 100644 --- a/src/serde/ser.rs +++ b/src/serde/ser.rs @@ -6,16 +6,17 @@ use core::{ fmt::{self, Display}, num::FpCategory, }; -use std::io; +use std::{io, str}; use faststr::FastStr; use serde::{ de::Unexpected, - ser::{self, Impossible, Serialize}, + ser::{self, Impossible, Serialize, Serializer as SerdeSerializer}, }; use super::de::tri; use crate::{ + config::SerializeCfg, error::{Error, ErrorCode, Result}, format::{CompactFormatter, Formatter, PrettyFormatter}, lazyvalue::value::HasEsc, @@ -28,6 +29,7 @@ pub struct Serializer { formatter: F, // TODO: record has_escape to optimize lazyvalue // has_escape: bool, + cfg: SerializeCfg, } impl Serializer @@ -61,7 +63,33 @@ where /// specified. #[inline] pub fn with_formatter(writer: W, formatter: F) -> Self { - Serializer { writer, formatter } + Serializer { + writer, + formatter, + cfg: SerializeCfg::default(), + } + } + + /// Enable sorting map keys before serialization. + /// + /// # Examples + /// ``` + /// use serde::Serialize; + /// use sonic_rs::{Serializer, json}; + /// let mut ser = Serializer::new(Vec::new()).sort_map_keys(); + /// let value = json!({"b": 1, "a": 2, "c": 3}); + /// value.serialize(&mut ser).unwrap(); + /// assert_eq!(ser.into_inner(), br#"{"a":2,"b":1,"c":3}"#); + /// ``` + #[inline] + pub fn sort_map_keys(mut self) -> Self { + self.cfg.sort_map_keys = true; + self + } + + pub(crate) fn with_cfg(mut self, cfg: SerializeCfg) -> Self { + self.cfg = cfg; + self } /// Unwrap the `Writer` from the `Serializer`. @@ -74,7 +102,7 @@ where impl<'a, W, F> ser::Serializer for &'a mut Serializer where W: WriteExt, - F: Formatter, + F: Formatter + Clone, { type Ok = (); type Error = Error; @@ -309,12 +337,12 @@ where .map_err(Error::io)); Ok(Compound::Map { ser: self, - state: State::Empty, + state: MapState::Stream(State::Empty), }) } else { Ok(Compound::Map { ser: self, - state: State::First, + state: MapState::Stream(State::First), }) } } @@ -367,19 +395,27 @@ where .formatter .begin_object(&mut self.writer) .map_err(Error::io)); - if len == Some(0) { + if self.cfg.sort_map_keys { + Ok(Compound::Map { + ser: self, + state: MapState::Sorted { + entries: Vec::with_capacity(len.unwrap_or(0)), + next_key: None, + }, + }) + } else if len == Some(0) { tri!(self .formatter .end_object(&mut self.writer) .map_err(Error::io)); Ok(Compound::Map { ser: self, - state: State::Empty, + state: MapState::Stream(State::Empty), }) } else { Ok(Compound::Map { ser: self, - state: State::First, + state: MapState::Stream(State::First), }) } } @@ -492,7 +528,7 @@ pub enum State { pub enum Compound<'a, W: 'a, F: 'a> { Map { ser: &'a mut Serializer, - state: State, + state: MapState, }, RawValue { @@ -500,6 +536,57 @@ pub enum Compound<'a, W: 'a, F: 'a> { }, } +pub enum MapState { + Stream(State), + Sorted { + entries: Vec<(String, Vec)>, + next_key: Option, + }, +} + +fn write_sorted_entries( + ser: &mut Serializer, + mut entries: Vec<(String, Vec)>, +) -> Result<()> +where + W: WriteExt, + F: Formatter, +{ + entries.sort_by(|a, b| a.0.cmp(&b.0)); + + let mut first = true; + for (key, value_buf) in entries.into_iter() { + tri!(ser + .formatter + .begin_object_key(&mut ser.writer, first) + .map_err(Error::io)); + first = false; + + tri!(SerdeSerializer::serialize_str(&mut *ser, &key)); + + tri!(ser + .formatter + .end_object_key(&mut ser.writer) + .map_err(Error::io)); + + tri!(ser + .formatter + .begin_object_value(&mut ser.writer) + .map_err(Error::io)); + let raw = unsafe { str::from_utf8_unchecked(&value_buf) }; + tri!(ser + .formatter + .write_raw_value(&mut ser.writer, raw) + .map_err(Error::io)); + tri!(ser + .formatter + .end_object_value(&mut ser.writer) + .map_err(Error::io)); + } + + ser.formatter.end_object(&mut ser.writer).map_err(Error::io) +} + impl<'a, W, F> ser::SerializeSeq for Compound<'a, W, F> where W: WriteExt, @@ -514,17 +601,21 @@ where T: ?Sized + Serialize, { match self { - Compound::Map { ser, state } => { - tri!(ser - .formatter - .begin_array_value(&mut ser.writer, *state == State::First) - .map_err(Error::io)); - *state = State::Rest; - tri!(value.serialize(&mut **ser)); - ser.formatter - .end_array_value(&mut ser.writer) - .map_err(Error::io) - } + Compound::Map { ser, state } => match state { + MapState::Stream(ref mut map_state) => { + tri!(ser + .formatter + .begin_array_value(&mut ser.writer, *map_state == State::First) + .map_err(Error::io)); + *map_state = State::Rest; + tri!(value.serialize(&mut **ser)); + ser.formatter + .end_array_value(&mut ser.writer) + .map_err(Error::io) + } + + MapState::Sorted { .. } => unreachable!(), + }, Compound::RawValue { .. } => unreachable!(), } @@ -534,8 +625,12 @@ where fn end(self) -> Result<()> { match self { Compound::Map { ser, state } => match state { - State::Empty => Ok(()), - _ => ser.formatter.end_array(&mut ser.writer).map_err(Error::io), + MapState::Stream(map_state) => match map_state { + State::Empty => Ok(()), + _ => ser.formatter.end_array(&mut ser.writer).map_err(Error::io), + }, + + MapState::Sorted { .. } => unreachable!(), }, Compound::RawValue { .. } => unreachable!(), @@ -606,17 +701,24 @@ where #[inline] fn end(self) -> Result<()> { match self { - Compound::Map { ser, state } => { - match state { - State::Empty => {} - _ => tri!(ser.formatter.end_array(&mut ser.writer).map_err(Error::io)), + Compound::Map { ser, state } => match state { + MapState::Stream(map_state) => { + match map_state { + State::Empty => {} + _ => tri!(ser.formatter.end_array(&mut ser.writer).map_err(Error::io)), + } + tri!(ser + .formatter + .end_object_value(&mut ser.writer) + .map_err(Error::io)); + ser.formatter.end_object(&mut ser.writer).map_err(Error::io) } - tri!(ser - .formatter - .end_object_value(&mut ser.writer) - .map_err(Error::io)); - ser.formatter.end_object(&mut ser.writer).map_err(Error::io) - } + + MapState::Sorted { entries, next_key } => { + debug_assert!(next_key.is_none()); + write_sorted_entries(ser, entries) + } + }, Compound::RawValue { .. } => unreachable!(), } @@ -626,7 +728,7 @@ where impl<'a, W, F> ser::SerializeMap for Compound<'a, W, F> where W: WriteExt, - F: Formatter, + F: Formatter + Clone, { type Ok = (); type Error = Error; @@ -637,19 +739,27 @@ where T: ?Sized + Serialize, { match self { - Compound::Map { ser, state } => { - tri!(ser - .formatter - .begin_object_key(&mut ser.writer, *state == State::First) - .map_err(Error::io)); - *state = State::Rest; - - tri!(key.serialize(MapKeySerializer { ser: *ser })); - - ser.formatter - .end_object_key(&mut ser.writer) - .map_err(Error::io) - } + Compound::Map { ser, state } => match state { + MapState::Stream(ref mut map_state) => { + tri!(ser + .formatter + .begin_object_key(&mut ser.writer, *map_state == State::First) + .map_err(Error::io)); + *map_state = State::Rest; + + tri!(key.serialize(MapKeySerializer { ser: *ser })); + + ser.formatter + .end_object_key(&mut ser.writer) + .map_err(Error::io) + } + + MapState::Sorted { next_key, .. } => { + let key_str = tri!(key.serialize(SortedKeySerializer)); + *next_key = Some(key_str); + Ok(()) + } + }, Compound::RawValue { .. } => unreachable!(), } @@ -661,16 +771,31 @@ where T: ?Sized + Serialize, { match self { - Compound::Map { ser, .. } => { - tri!(ser - .formatter - .begin_object_value(&mut ser.writer) - .map_err(Error::io)); - tri!(value.serialize(&mut **ser)); - ser.formatter - .end_object_value(&mut ser.writer) - .map_err(Error::io) - } + Compound::Map { ser, state } => match state { + MapState::Stream(_) => { + tri!(ser + .formatter + .begin_object_value(&mut ser.writer) + .map_err(Error::io)); + tri!(value.serialize(&mut **ser)); + ser.formatter + .end_object_value(&mut ser.writer) + .map_err(Error::io) + } + + MapState::Sorted { entries, next_key } => { + let key = next_key + .take() + .expect("serialize_value called before serialize_key"); + let mut entry_ser = + Serializer::with_formatter(Vec::with_capacity(128), ser.formatter.clone()) + .with_cfg(ser.cfg); + tri!(value.serialize(&mut entry_ser)); + let stored = entry_ser.into_inner(); + entries.push((key, stored)); + Ok(()) + } + }, Compound::RawValue { .. } => unreachable!(), } @@ -680,8 +805,15 @@ where fn end(self) -> Result<()> { match self { Compound::Map { ser, state } => match state { - State::Empty => Ok(()), - _ => ser.formatter.end_object(&mut ser.writer).map_err(Error::io), + MapState::Stream(map_state) => match map_state { + State::Empty => Ok(()), + _ => ser.formatter.end_object(&mut ser.writer).map_err(Error::io), + }, + + MapState::Sorted { entries, next_key } => { + debug_assert!(next_key.is_none()); + write_sorted_entries(ser, entries) + } }, Compound::RawValue { .. } => unreachable!(), @@ -748,17 +880,24 @@ where #[inline] fn end(self) -> Result<()> { match self { - Compound::Map { ser, state } => { - match state { - State::Empty => {} - _ => tri!(ser.formatter.end_object(&mut ser.writer).map_err(Error::io)), + Compound::Map { ser, state } => match state { + MapState::Stream(map_state) => { + match map_state { + State::Empty => {} + _ => tri!(ser.formatter.end_object(&mut ser.writer).map_err(Error::io)), + } + tri!(ser + .formatter + .end_object_value(&mut ser.writer) + .map_err(Error::io)); + ser.formatter.end_object(&mut ser.writer).map_err(Error::io) } - tri!(ser - .formatter - .end_object_value(&mut ser.writer) - .map_err(Error::io)); - ser.formatter.end_object(&mut ser.writer).map_err(Error::io) - } + + MapState::Sorted { entries, next_key } => { + debug_assert!(next_key.is_none()); + write_sorted_entries(ser, entries) + } + }, Compound::RawValue { .. } => unreachable!(), } @@ -769,6 +908,209 @@ struct MapKeySerializer<'a, W: 'a, F: 'a> { ser: &'a mut Serializer, } +struct SortedKeySerializer; + +impl serde::Serializer for SortedKeySerializer { + type Ok = String; + type Error = Error; + + type SerializeSeq = Impossible; + type SerializeTuple = Impossible; + type SerializeTupleStruct = Impossible; + type SerializeTupleVariant = Impossible; + type SerializeMap = Impossible; + type SerializeStruct = Impossible; + type SerializeStructVariant = Impossible; + + #[inline] + fn serialize_bool(self, value: bool) -> Result { + Ok(if value { "true" } else { "false" }.to_owned()) + } + + #[inline] + fn serialize_i8(self, value: i8) -> Result { + self.serialize_i64(value as i64) + } + + #[inline] + fn serialize_i16(self, value: i16) -> Result { + self.serialize_i64(value as i64) + } + + #[inline] + fn serialize_i32(self, value: i32) -> Result { + self.serialize_i64(value as i64) + } + + fn serialize_i64(self, value: i64) -> Result { + let mut buf = itoa::Buffer::new(); + Ok(buf.format(value).to_owned()) + } + + fn serialize_i128(self, value: i128) -> Result { + Ok(value.to_string()) + } + + #[inline] + fn serialize_u8(self, value: u8) -> Result { + self.serialize_u64(value as u64) + } + + #[inline] + fn serialize_u16(self, value: u16) -> Result { + self.serialize_u64(value as u64) + } + + #[inline] + fn serialize_u32(self, value: u32) -> Result { + self.serialize_u64(value as u64) + } + + fn serialize_u64(self, value: u64) -> Result { + let mut buf = itoa::Buffer::new(); + Ok(buf.format(value).to_owned()) + } + + fn serialize_u128(self, value: u128) -> Result { + Ok(value.to_string()) + } + + fn serialize_f32(self, value: f32) -> Result { + if value.is_finite() { + let mut buf = ryu::Buffer::new(); + Ok(buf.format_finite(value).to_owned()) + } else { + Err(key_must_be_str_or_num(Unexpected::Other( + "NaN or Infinite f32", + ))) + } + } + + fn serialize_f64(self, value: f64) -> Result { + if value.is_finite() { + let mut buf = ryu::Buffer::new(); + Ok(buf.format_finite(value).to_owned()) + } else { + Err(key_must_be_str_or_num(Unexpected::Other( + "NaN or Infinite f64", + ))) + } + } + + #[inline] + fn serialize_char(self, value: char) -> Result { + Ok(value.to_string()) + } + + #[inline] + fn serialize_str(self, value: &str) -> Result { + Ok(value.to_owned()) + } + + fn serialize_bytes(self, _value: &[u8]) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other("bytes"))) + } + + fn serialize_unit(self) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other("unit"))) + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other(name))) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + Ok(variant.to_owned()) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: ?Sized + Serialize, + { + Err(key_must_be_str_or_num(Unexpected::NewtypeVariant)) + } + + fn serialize_none(self) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other("none"))) + } + + fn serialize_some(self, value: &T) -> Result + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(key_must_be_str_or_num(Unexpected::Seq)) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other("tuple"))) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other("tuple_struct"))) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(key_must_be_str_or_num(Unexpected::TupleVariant)) + } + + fn serialize_map(self, _len: Option) -> Result { + Err(key_must_be_str_or_num(Unexpected::Map)) + } + + fn serialize_struct(self, name: &'static str, _len: usize) -> Result { + Err(key_must_be_str_or_num(Unexpected::Other(name))) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(key_must_be_str_or_num(Unexpected::StructVariant)) + } + + fn collect_str(self, value: &T) -> Result + where + T: ?Sized + Display, + { + Ok(value.to_string()) + } +} + // TODO: fix the error info fn invalid_raw_value() -> Error { Error::ser_error(ErrorCode::InvalidJsonValue) @@ -1243,7 +1585,7 @@ where W: WriteExt, T: ?Sized + Serialize, { - let mut ser = Serializer::pretty(writer); + let mut ser = Serializer::with_formatter(writer, PrettyFormatter::new()); value.serialize(&mut ser) } diff --git a/src/value/ser.rs b/src/value/ser.rs index 3b760c6..c39d8aa 100644 --- a/src/value/ser.rs +++ b/src/value/ser.rs @@ -879,8 +879,6 @@ mod test { #[test] fn test_to_value() { - use crate::{json, to_value, Value}; - let user = User { string: "hello".into(), number: 123,