diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 8e8f7c6..3e25ff0 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -62,5 +62,9 @@ name = "get_from" harness = false name = "value_operator" +[[bench]] +harness = false +name = "object_get_optimization" + [features] default = [] diff --git a/benchmarks/benches/object_get_optimization.rs b/benchmarks/benches/object_get_optimization.rs new file mode 100644 index 0000000..1a37ded --- /dev/null +++ b/benchmarks/benches/object_get_optimization.rs @@ -0,0 +1,221 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use sonic_rs::{Object, Value, from_str}; + +fn create_small_object(size: usize) -> Object { + let mut obj = Object::new(); + for i in 0..size { + obj.insert(&format!("key{:02}", i), Value::from(i)); + } + obj +} + +fn create_medium_object(size: usize) -> Object { + let mut obj = Object::new(); + for i in 0..size { + // Use longer keys to benefit from SIMD optimization + obj.insert(&format!("medium_key_name_{:03}", i), Value::from(i)); + } + obj +} + +fn create_large_object(size: usize) -> Object { + let mut obj = Object::new(); + for i in 0..size { + obj.insert(&format!("large_object_key_{:04}", i), Value::from(i * 10)); + } + obj +} + +fn bench_small_objects(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("small_objects_1_to_7_keys"); + + for size in [1, 3, 5, 7] { + let obj = create_small_object(size); + let test_key = format!("key{:02}", size / 2); // middle key + + group.throughput(Throughput::Elements(1)); + group.bench_with_input( + BenchmarkId::new("optimized_get", size), + &(obj, test_key), + |b, (obj, key)| { + b.iter(|| obj.get(key)) + } + ); + } + group.finish(); +} + +fn bench_medium_objects(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("medium_objects_8_to_31_keys"); + + for size in [8, 15, 20, 31] { + let obj = create_medium_object(size); + let test_key = format!("medium_key_name_{:03}", size / 2); // middle key + + group.throughput(Throughput::Elements(1)); + group.bench_with_input( + BenchmarkId::new("simd_optimized_get", size), + &(obj, test_key), + |b, (obj, key)| { + b.iter(|| obj.get(key)) + } + ); + } + group.finish(); +} + +fn bench_large_objects(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("large_objects_32_plus_keys"); + + for size in [32, 50, 100, 200] { + let obj = create_large_object(size); + let test_key = format!("large_object_key_{:04}", size / 2); // middle key + + group.throughput(Throughput::Elements(1)); + group.bench_with_input( + BenchmarkId::new("hash_index_get", size), + &(obj, test_key), + |b, (obj, key)| { + b.iter(|| obj.get(key)) + } + ); + } + group.finish(); +} + +fn bench_different_key_positions(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("key_position_impact"); + + let obj = create_large_object(100); + + // Test first, middle, and last key positions + let positions = [ + ("first", "large_object_key_0000"), + ("middle", "large_object_key_0050"), + ("last", "large_object_key_0099"), + ]; + + for (pos_name, key) in positions { + group.bench_with_input( + BenchmarkId::new("get_by_position", pos_name), + &key, + |b, key| { + b.iter(|| obj.get(key)) + } + ); + } + group.finish(); +} + +fn bench_cache_behavior(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("cache_behavior"); + + let obj = create_large_object(100); + let test_key = "large_object_key_0050"; + + group.bench_function("repeated_lookups", |b| { + b.iter(|| { + // Perform multiple lookups to test cache effectiveness + for _ in 0..10 { + obj.get(&test_key); + } + }) + }); + + group.finish(); +} + +fn bench_key_length_impact(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("key_length_impact"); + + // Test with different key lengths to evaluate SIMD effectiveness + let test_cases = [ + ("short", 4, "k"), + ("medium", 16, "medium_length_key"), + ("long", 32, "very_long_key_name_that_should_benefit_from_simd"), + ]; + + for (name, obj_size, key_prefix) in test_cases { + let mut obj = Object::new(); + for i in 0..obj_size { + let key = format!("{}_{:03}", key_prefix, i); + obj.insert(&key, Value::from(i)); + } + + let test_key = format!("{}_{:03}", key_prefix, obj_size / 2); + + group.bench_with_input( + BenchmarkId::new("get_by_key_length", name), + &(obj, test_key), + |b, (obj, key)| { + b.iter(|| obj.get(key)) + } + ); + } + group.finish(); +} + +fn bench_real_world_patterns(c: &mut Criterion) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); + + let mut group = c.benchmark_group("real_world_patterns"); + + // Simulate common JSON patterns + let json_configs = [ + ("api_response", r#"{"status": "success", "data": {"id": 123, "name": "John", "email": "john@example.com"}, "timestamp": "2024-01-01T00:00:00Z", "version": "1.0"}"#), + ("user_profile", r#"{"userId": 12345, "username": "johndoe", "firstName": "John", "lastName": "Doe", "email": "john.doe@example.com", "birthDate": "1990-01-01", "isActive": true, "roles": ["user", "admin"], "preferences": {"theme": "dark", "language": "en"}, "lastLogin": "2024-01-01T12:00:00Z"}"#), + ]; + + for (name, json) in json_configs { + let obj: Object = from_str(json).unwrap(); + + // Test common access patterns + let test_keys = match name { + "api_response" => vec!["status", "data", "timestamp"], + "user_profile" => vec!["userId", "email", "isActive", "preferences"], + _ => vec!["status"], + }; + + for key in test_keys { + group.bench_with_input( + BenchmarkId::new(name, key), + &(obj.clone(), key), + |b, (obj, key)| { + b.iter(|| obj.get(key)) + } + ); + } + } + group.finish(); +} + +criterion_group!( + benches, + bench_small_objects, + bench_medium_objects, + bench_large_objects, + bench_different_key_positions, + bench_cache_behavior, + bench_key_length_impact, + bench_real_world_patterns +); +criterion_main!(benches); \ No newline at end of file diff --git a/src/value/node.rs b/src/value/node.rs index 380f11d..5f63ee5 100644 --- a/src/value/node.rs +++ b/src/value/node.rs @@ -1,14 +1,17 @@ use core::mem::size_of; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; #[cfg(feature = "sort_keys")] use std::collections::BTreeMap; use std::{ alloc::Layout, + collections::HashMap, fmt::{Debug, Display, Formatter}, mem::{transmute, ManuallyDrop}, ptr::NonNull, slice::from_raw_parts, str::from_utf8_unchecked, - sync::Arc, + sync::{Arc, LazyLock, Mutex}, }; #[cfg(not(feature = "sort_keys"))] @@ -17,6 +20,14 @@ use faststr::FastStr; use ref_cast::RefCast; use serde::ser::{Serialize, SerializeMap, SerializeSeq}; +// Type aliases to reduce complexity +type HashIndex = HashMap; +type CacheEntry = (HashIndex, u32); +type HashIndexCache = HashMap; + +static HASH_INDEX_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(HashMap::new())); + use super::{ object::Pair, shared::Shared, @@ -1155,6 +1166,11 @@ impl Value { self.get_key_value(key).map(|(_, v)| v) } + #[inline] + pub(crate) fn get_key_optimized(&self, key: &str) -> Option<&Self> { + self.get_key_value_optimized(key).map(|(_, v)| v) + } + pub(crate) fn get_key_value(&self, key: &str) -> Option<(&str, &Self)> { debug_assert!(self.is_object()); let ref_inner = self.as_ref2(); @@ -1173,6 +1189,217 @@ impl Value { None } + /// Optimized key-value lookup with multi-level adaptive strategies + pub(crate) fn get_key_value_optimized(&self, key: &str) -> Option<(&str, &Self)> { + debug_assert!(self.is_object()); + let ref_inner = self.as_ref2(); + + if let ValueRefInner::Object(pairs) = ref_inner { + let len = pairs.len(); + + // Multi-level adaptive optimization strategy + match len { + 0 => None, + 1..=7 => { + // Small objects: Optimized linear search + self.linear_search_small(key, pairs) + } + 8..=31 => { + // Medium objects: SIMD-accelerated linear search + self.simd_search_optimized(key, pairs) + } + _ => { + // Large objects: Hash index + cache + self.large_object_search_with_hash(key, pairs) + } + } + } else if let ValueRefInner::ObjectOwned(kv) = ref_inner { + // For owned objects, use the existing hash map lookup + if let Some((k, v)) = kv.get_key_value(key) { + return Some((k.as_str(), v)); + } + None + } else { + None + } + } + + /// Hash index search for large objects (> 32 keys) + fn large_object_search_with_hash<'a>( + &self, + key: &str, + pairs: &'a [(Value, Value)], + ) -> Option<(&'a str, &'a Self)> { + let pairs_ptr = pairs.as_ptr() as usize; + + // Try to get or build hash index + if let Ok(mut cache) = HASH_INDEX_CACHE.lock() { + let entry = cache.entry(pairs_ptr).or_insert_with(|| { + // Build hash index for this object + let mut hash_index = HashMap::with_capacity(pairs.len()); + for (i, (k, _)) in pairs.iter().enumerate() { + if let Some(k_str) = k.as_str() { + // For duplicate keys, keep the first occurrence (consistent with linear search) + hash_index.entry(k_str.to_string()).or_insert(i); + } + } + (hash_index, 1) // (hash_index, access_count) + }); + + // Increment access count + entry.1 += 1; + + // Use hash index for lookup + if let Some(&index) = entry.0.get(key) { + if index < pairs.len() { + if let Some(k_str) = pairs[index].0.as_str() { + if k_str == key { + return Some((k_str, &pairs[index].1)); + } + } + } + } + + // Clean up cache if it gets too large (simple LRU-like cleanup) + if cache.len() > 100 { + cache.retain(|_, (_, access_count)| *access_count > 1); + for (_, access_count) in cache.values_mut() { + *access_count = (*access_count).saturating_sub(1); + } + } + } + + // Fallback to SIMD search if hash index fails + self.simd_search_optimized(key, pairs) + } + + /// Optimized linear search for small objects + #[inline] + fn linear_search_small<'a>( + &self, + key: &str, + pairs: &'a [(Value, Value)], + ) -> Option<(&'a str, &'a Self)> { + let key_len = key.len(); + + // Length pre-check optimization for small objects + for (k, v) in pairs { + if let Some(k_str) = k.as_str() { + // Length pre-check before string comparison + if k_str.len() == key_len && k_str == key { + return Some((k_str, v)); + } + } + } + None + } + + /// SIMD-accelerated search for medium and large objects + #[inline] + fn simd_search_optimized<'a>( + &self, + key: &str, + pairs: &'a [(Value, Value)], + ) -> Option<(&'a str, &'a Self)> { + let key_bytes = key.as_bytes(); + + // Try SIMD optimization for longer keys + if key_bytes.len() >= 16 { + if let Some(result) = self.simd_string_compare(key, pairs) { + return Some(result); + } + } + + // Fallback to optimized linear search + self.linear_search_optimized(key, pairs) + } + + /// SIMD string comparison for keys >= 16 bytes + #[cfg(target_arch = "x86_64")] + fn simd_string_compare<'a>( + &self, + key: &str, + pairs: &'a [(Value, Value)], + ) -> Option<(&'a str, &'a Self)> { + if !is_x86_feature_detected!("sse2") { + return None; + } + + let key_bytes = key.as_bytes(); + let key_len = key_bytes.len(); + + unsafe { + // Load first 16 bytes of key for SIMD comparison + let key_vec = if key_len >= 16 { + _mm_loadu_si128(key_bytes.as_ptr() as *const __m128i) + } else { + // Pad with zeros for shorter keys + let mut padded = [0u8; 16]; + padded[..key_len].copy_from_slice(key_bytes); + _mm_loadu_si128(padded.as_ptr() as *const __m128i) + }; + + for (k, v) in pairs { + if let Some(k_str) = k.as_str() { + let k_bytes = k_str.as_bytes(); + + // Quick length check + if k_bytes.len() != key_len { + continue; + } + + if k_bytes.len() >= 16 { + // SIMD comparison for first 16 bytes + let k_vec = _mm_loadu_si128(k_bytes.as_ptr() as *const __m128i); + let cmp = _mm_cmpeq_epi8(key_vec, k_vec); + let mask = _mm_movemask_epi8(cmp); + + if mask == 0xFFFF { + // First 16 bytes match, check remaining bytes + if key_len <= 16 || key_bytes[16..] == k_bytes[16..] { + return Some((k_str, v)); + } + } + } else if key_bytes == k_bytes { + return Some((k_str, v)); + } + } + } + } + + None + } + + /// Fallback SIMD implementation for non-x86_64 architectures + #[cfg(not(target_arch = "x86_64"))] + fn simd_string_compare<'a>( + &self, + _key: &str, + _pairs: &'a [(Value, Value)], + ) -> Option<(&'a str, &'a Self)> { + None + } + + /// Optimized linear search with length pre-check + #[inline] + fn linear_search_optimized<'a>( + &self, + key: &str, + pairs: &'a [(Value, Value)], + ) -> Option<(&'a str, &'a Self)> { + let key_len = key.len(); + + for (k, v) in pairs { + if let Some(k_str) = k.as_str() { + // Length pre-check before string comparison + if k_str.len() == key_len && k_str == key { + return Some((k_str, v)); + } + } + } + None + } + #[inline] pub(crate) fn get_key_mut(&mut self, key: &str) -> Option<&mut Self> { if let ValueMut::Object(kv) = self.as_mut() { diff --git a/src/value/object.rs b/src/value/object.rs index 35d30dc..1b386a0 100644 --- a/src/value/object.rs +++ b/src/value/object.rs @@ -139,7 +139,7 @@ impl Object { /// ``` #[inline] pub fn get>(&self, key: &Q) -> Option<&Value> { - self.0.get_key(key.as_ref()) + self.0.get_key_optimized(key.as_ref()) } /// Returns `true` if the map contains a value for the specified key. diff --git a/src/value/ser.rs b/src/value/ser.rs index 3b760c6..dd687f1 100644 --- a/src/value/ser.rs +++ b/src/value/ser.rs @@ -879,7 +879,7 @@ mod test { #[test] fn test_to_value() { - use crate::{json, to_value, Value}; + use crate::{to_value, Value}; let user = User { string: "hello".into(),