From 3f30e41d7c1145081a5c73a90b927a10cbbd8312 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Fri, 4 Jul 2025 10:54:57 +0800 Subject: [PATCH] Benchmark: Add rich testing cases for sort string(utf8) --- arrow/benches/sort_kernel.rs | 50 ++++++++++++++++++++++++++++++++++++ arrow/src/util/bench_util.rs | 2 +- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/arrow/benches/sort_kernel.rs b/arrow/benches/sort_kernel.rs index 7262ba2ef9d2..8fcd8a570daf 100644 --- a/arrow/benches/sort_kernel.rs +++ b/arrow/benches/sort_kernel.rs @@ -103,6 +103,36 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_sort_to_indices(&arr, None)) }); + let arr = create_string_array_with_max_len::(2usize.pow(12), 0.0, 10); + c.bench_function("sort string[0-10] to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array_with_max_len::(2usize.pow(12), 0.5, 10); + c.bench_function("sort string[0-10] nulls to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array_with_max_len::(2usize.pow(12), 0.0, 100); + c.bench_function("sort string[0-100] to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array_with_max_len::(2usize.pow(12), 0.5, 100); + c.bench_function("sort string[0-100] nulls to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array::(2usize.pow(12), 0.0); + c.bench_function("sort string[0-400] to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array::(2usize.pow(12), 0.5); + c.bench_function("sort string[0-400] nulls to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + let arr = create_string_array_with_len::(2usize.pow(12), 0.0, 10); c.bench_function("sort string[10] to indices 2^12", |b| { b.iter(|| bench_sort_to_indices(&arr, None)) @@ -113,6 +143,26 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_sort_to_indices(&arr, None)) }); + let arr = create_string_array_with_len::(2usize.pow(12), 0.0, 100); + c.bench_function("sort string[100] to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array_with_len::(2usize.pow(12), 0.5, 100); + c.bench_function("sort string[100] nulls to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array_with_len::(2usize.pow(12), 0.0, 1000); + c.bench_function("sort string[1000] to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + + let arr = create_string_array_with_len::(2usize.pow(12), 0.5, 1000); + c.bench_function("sort string[1000] nulls to indices 2^12", |b| { + b.iter(|| bench_sort_to_indices(&arr, None)) + }); + // This will generate string view arrays with 2^12 elements, each with a length fixed 10, and without nulls. let arr = create_string_view_array_with_fixed_len(2usize.pow(12), 0.0, 10); c.bench_function("sort string_view[10] to indices 2^12", |b| { diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index c7883ede7be3..521dc748777c 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -228,7 +228,7 @@ fn create_string_view_array_with_len_range_and_prefix( } /// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length -fn create_string_array_with_max_len( +pub fn create_string_array_with_max_len( size: usize, null_density: f32, max_str_len: usize,