From 46833cb59aa2752872d3e71030d5937d8ed1286c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 30 Dec 2025 13:23:14 -0800 Subject: [PATCH 1/2] perf: optimize left function by eliminating double chars() iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For negative n values, the function was calling string.chars() twice: 1. Once to count total characters 2. Again to take the prefix This optimization collects chars into a reusable buffer once per row for the negative n case, eliminating the redundant iteration. Benchmark results (negative n, which triggers the optimization): - size=1024: 71.323 µs → 52.760 µs (26.0% faster) - size=4096: 289.62 µs → 212.23 µs (26.7% faster) Benchmark results (positive n, minimal overhead): - size=1024: 24.465 µs → 24.691 µs (0.9% slower) - size=4096: 96.129 µs → 97.078 µs (1.0% slower) The dramatic improvement for negative n cases far outweighs the negligible overhead for positive n cases. --- datafusion/functions/src/unicode/left.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index ecff8f8699506..6e07ea76d5593 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -139,14 +139,22 @@ fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor>( n_array: &Int64Array, ) -> Result { let iter = ArrayIter::new(string_array); + let mut chars_buf = Vec::new(); let result = iter .zip(n_array.iter()) .map(|(string, n)| match (string, n) { (Some(string), Some(n)) => match n.cmp(&0) { Ordering::Less => { - let len = string.chars().count() as i64; + // Collect chars once and reuse for both count and take + chars_buf.clear(); + chars_buf.extend(string.chars()); + let len = chars_buf.len() as i64; + Some(if n.abs() < len { - string.chars().take((len + n) as usize).collect::() + chars_buf + .iter() + .take((len + n) as usize) + .collect::() } else { "".to_string() }) From adee2ed6187be9c052944547a29f7976cdb107ed Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 4 Jan 2026 13:10:50 -0800 Subject: [PATCH 2/2] fix: Avoid abs() panic on i64::MIN in left() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced `n.abs() < len` with `n > -len` to avoid panic/wrap on i64::MIN. The original code used .abs() on a negative i64 value, which causes: - Panic in debug builds when n = i64::MIN (overflow check) - Wrapping behavior in release builds (undefined behavior) The new condition `n > -len` is mathematically equivalent: - When n < 0: n.abs() = -n - So: -n < len is equivalent to n > -len This handles the i64::MIN edge case safely without performance impact, as it's just a comparison with negated value instead of abs(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- datafusion/functions/src/unicode/left.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index 6e07ea76d5593..db27d900b6828 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -150,7 +150,8 @@ fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor>( chars_buf.extend(string.chars()); let len = chars_buf.len() as i64; - Some(if n.abs() < len { + // For negative n, take (len + n) chars if n > -len (avoiding abs() which panics on i64::MIN) + Some(if n > -len { chars_buf .iter() .take((len + n) as usize)