From b4953b4e33f013dbb7fc4f7eb5dc8de3238e9d2a Mon Sep 17 00:00:00 2001 From: Dunqing <29533304+Dunqing@users.noreply.github.com> Date: Sat, 3 May 2025 13:03:18 +0000 Subject: [PATCH] perf(allocator/vec2): resolve performance regression for `extend` by marking reserve as `#[cold]` and `#[inline(never)]` (#10675) I guess the performance regression reason is that the current implementation has more instructions than before. Here to use the lower of `size_hint` to reserve space, which is bloating the loop body. Also, the `for` loop is easier to optimize by the compiler. `reserve` inside `extend` is rarely taken, so mark it as `#[cold]` and `#[inline(never)]`, which can reduce the instructions in `while` loop. We got a 3%-4% performance improvement in the `minfier`, but the transformer performance did not fully get back to before #10670. Anyway, I think we can accept the less than 1% performance regression; this change can unblock us from pushing forward the `Vec` improvement; we will get it back in at the end of the stack! See #9856 --- crates/oxc_allocator/src/vec2/mod.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/oxc_allocator/src/vec2/mod.rs b/crates/oxc_allocator/src/vec2/mod.rs index 0db874c9e2664..2847fa4c2f0cc 100644 --- a/crates/oxc_allocator/src/vec2/mod.rs +++ b/crates/oxc_allocator/src/vec2/mod.rs @@ -2281,7 +2281,8 @@ impl<'a, 'bump, T> IntoIterator for &'a mut Vec<'bump, T> { impl<'bump, T: 'bump> Extend for Vec<'bump, T> { #[inline] fn extend>(&mut self, iter: I) { - self.extend_desugared(iter.into_iter()); + let iterator = iter.into_iter(); + self.extend_desugared(iterator); } } @@ -2300,8 +2301,18 @@ impl<'bump, T: 'bump> Vec<'bump, T> { while let Some(element) = iterator.next() { let len = self.len(); if len == self.capacity() { - let (lower, _) = iterator.size_hint(); - self.reserve(lower.saturating_add(1)); + // This reallocation path is rarely taken, especially with prior reservation, + // so mark it `#[cold]` and `#[inline(never)]` helps the compiler optimize the + // common case, and prevents this cold path from being inlined to the `while` loop, + // which increases the execution instructions and hits the performance. + #[cold] + #[inline(never)] + fn reserve_slow(v: &mut Vec, iterator: &impl Iterator) { + let (lower, _) = iterator.size_hint(); + v.reserve(lower.saturating_add(1)); + } + + reserve_slow(self, &iterator); } unsafe { ptr::write(self.as_mut_ptr().add(len), element);