From 0cb5a7580b04648def90e4929bde07eb61efccd8 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Fri, 9 Feb 2024 19:20:56 +0000 Subject: [PATCH] Improve performance of `String::push_str` --- benches/benches.rs | 26 +++++++++++++++++++++++++- src/collections/string.rs | 26 +++++++++++++++++++++++++- tests/all/string.rs | 17 +++++++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/benches/benches.rs b/benches/benches.rs index 0fe806d..00bac02 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -89,6 +89,14 @@ fn format_realloc(bump: &bumpalo::Bump, n: usize) { criterion::black_box(s); } +#[cfg(feature = "collections")] +fn string_push_str(bump: &bumpalo::Bump, str: &str) { + let str = criterion::black_box(str); + let mut s = bumpalo::collections::string::String::with_capacity_in(str.len(), bump); + s.push_str(str); + criterion::black_box(s); +} + const ALLOCATIONS: usize = 10_000; fn bench_alloc(c: &mut Criterion) { @@ -202,6 +210,21 @@ fn bench_format_realloc(c: &mut Criterion) { } } +fn bench_string_push_str(c: &mut Criterion) { + let len: usize = 16 * 1024; // 16 KiB + + let mut group = c.benchmark_group("alloc"); + group.throughput(Throughput::Elements(len as u64)); + group.bench_function("push_str", |b| { + let mut bump = bumpalo::Bump::with_capacity(len); + let str = "x".repeat(len); + b.iter(|| { + bump.reset(); + string_push_str(&bump, &*str); + }); + }); +} + criterion_group!( benches, bench_alloc, @@ -212,6 +235,7 @@ criterion_group!( bench_try_alloc_with, bench_try_alloc_try_with, bench_try_alloc_try_with_err, - bench_format_realloc + bench_format_realloc, + bench_string_push_str ); criterion_main!(benches); diff --git a/src/collections/string.rs b/src/collections/string.rs index ffd1db9..8173eab 100644 --- a/src/collections/string.rs +++ b/src/collections/string.rs @@ -925,7 +925,31 @@ impl<'bump> String<'bump> { /// ``` #[inline] pub fn push_str(&mut self, string: &str) { - self.vec.extend_from_slice(string.as_bytes()) + // Reserve space in the Vec for the string to be added + let old_len = self.vec.len(); + self.vec.reserve(string.len()); + + let new_len = old_len + string.len(); + debug_assert!(new_len <= self.vec.capacity()); + + // Copy string into space just reserved + // SAFETY: + // * `src` is valid for reads of `string.len()` bytes by virtue of being an allocated `&str`. + // * `dst` is valid for writes of `string.len()` bytes as `self.vec.reserve(string.len())` + // above guarantees that. + // * Alignment is not relevant as `u8` has no alignment requirements. + // * Source and destination ranges cannot overlap as we just reserved the destination + // range from the bump. + unsafe { + let src = string.as_ptr(); + let dst = self.vec.as_mut_ptr().add(old_len); + ptr::copy_nonoverlapping(src, dst, string.len()); + } + + // Update length of Vec to include string just pushed + // SAFETY: We reserved sufficent capacity for the string above. + // The elements at `old_len..new_len` were initialized by `copy_nonoverlapping` above. + unsafe { self.vec.set_len(new_len) }; } /// Returns this `String`'s capacity, in bytes. diff --git a/tests/all/string.rs b/tests/all/string.rs index 5523751..526c109 100644 --- a/tests/all/string.rs +++ b/tests/all/string.rs @@ -17,3 +17,20 @@ fn trailing_comma_in_format_macro() { let v = format![in &b, "{}{}", 1, 2, ]; assert_eq!(v, "12"); } + +#[test] +fn push_str() { + let b = Bump::new(); + let mut s = String::new_in(&b); + s.push_str("abc"); + assert_eq!(s, "abc"); + s.push_str("def"); + assert_eq!(s, "abcdef"); + s.push_str(""); + assert_eq!(s, "abcdef"); + s.push_str(&"x".repeat(4000)); + assert_eq!(s.len(), 4006); + s.push_str("ghi"); + assert_eq!(s.len(), 4009); + assert_eq!(&s[s.len() - 5..], "xxghi"); +}