From 79aa9b15d7403ce2dc40b525a1d16e6c4ad1973c Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Sun, 13 Sep 2020 11:58:43 -0700 Subject: [PATCH] Optimize behavior of vec.split_off(0) (take all) Optimization improvement to `split_off()` so the performance meets the intuitively expected behavior when `at == 0`, avoiding the current behavior of copying the entire vector. The change honors documented behavior that the method leaves the original vector's "previous capacity unchanged". This improvement better supports the pattern for building and flushing a buffer of elements, such as the following: ```rust let mut vec = Vec::new(); loop { vec.push(something); if condition_is_met { process(vec.split_off(0)); } } ``` `Option` wrapping is the first alternative I thought of, but is much less obvious and more verbose: ```rust let mut capacity = 1; let mut vec: Option> = None; loop { vec.get_or_insert_with(|| Vec::with_capacity(capacity)).push(something); if condition_is_met { capacity = vec.capacity(); process(vec.take().unwrap()); } } ``` Directly applying `mem::replace()` could work, but `mem::` functions are typically a last resort, when a developer is actively seeking better performance than the standard library provides, for example. The benefit of the approach to this change is it does not change the existing API contract, but improves the peformance of `split_off(0)` for `Vec`, `String` (which delegates `split_off()` to `Vec`), and any other existing use cases. This change adds tests to validate the behavior of `split_off()` with regard to capacity, as originally documented, and confirm that behavior still holds, when `at == 0`. The change is an implementation detail, and does not require a documentation change, but documenting the new behavior as part of its API contract may benefit future users. (Let me know if I should make that documentation update.) Note, for future consideration: I think it would be helpful to introduce an additional method to `Vec` (if not also to `String`): ``` pub fn take_all(&mut self) -> Self { self.split_off(0) } ``` This would make it more clear how `Vec` supports the pattern, and make it easier to find, since the behavior is similar to other `take()` methods in the Rust standard library. --- library/alloc/src/vec.rs | 5 +++++ library/alloc/tests/string.rs | 4 ++++ library/alloc/tests/vec.rs | 14 ++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/library/alloc/src/vec.rs b/library/alloc/src/vec.rs index eba7ffae22c4c..c939a7bb29533 100644 --- a/library/alloc/src/vec.rs +++ b/library/alloc/src/vec.rs @@ -1410,6 +1410,11 @@ impl Vec { assert_failed(at, self.len()); } + if at == 0 { + // the new vector can take over the original buffer and avoid the copy + return mem::replace(self, Vec::with_capacity(self.capacity())); + } + let other_len = self.len - at; let mut other = Vec::with_capacity(other_len); diff --git a/library/alloc/tests/string.rs b/library/alloc/tests/string.rs index 6059bec8c5a3d..f7f78046d089b 100644 --- a/library/alloc/tests/string.rs +++ b/library/alloc/tests/string.rs @@ -278,17 +278,21 @@ fn test_split_off_mid_char() { #[test] fn test_split_off_ascii() { let mut ab = String::from("ABCD"); + let orig_capacity = ab.capacity(); let cd = ab.split_off(2); assert_eq!(ab, "AB"); assert_eq!(cd, "CD"); + assert_eq!(ab.capacity(), orig_capacity); } #[test] fn test_split_off_unicode() { let mut nihon = String::from("日本語"); + let orig_capacity = nihon.capacity(); let go = nihon.split_off("日本".len()); assert_eq!(nihon, "日本"); assert_eq!(go, "語"); + assert_eq!(nihon.capacity(), orig_capacity); } #[test] diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs index 53b0d0a271844..dd3a9f3584206 100644 --- a/library/alloc/tests/vec.rs +++ b/library/alloc/tests/vec.rs @@ -772,9 +772,23 @@ fn test_append() { #[test] fn test_split_off() { let mut vec = vec![1, 2, 3, 4, 5, 6]; + let orig_capacity = vec.capacity(); let vec2 = vec.split_off(4); assert_eq!(vec, [1, 2, 3, 4]); assert_eq!(vec2, [5, 6]); + assert_eq!(vec.capacity(), orig_capacity); +} + +#[test] +fn test_split_off_take_all() { + let mut vec = vec![1, 2, 3, 4, 5, 6]; + let orig_ptr = vec.as_ptr(); + let orig_capacity = vec.capacity(); + let vec2 = vec.split_off(0); + assert_eq!(vec, []); + assert_eq!(vec2, [1, 2, 3, 4, 5, 6]); + assert_eq!(vec.capacity(), orig_capacity); + assert_eq!(vec2.as_ptr(), orig_ptr); } #[test]