From a643da81f3009c273c1e4dcc77cb1ef33ddb47bd Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Sat, 11 Oct 2025 11:56:46 +0200 Subject: [PATCH] Add RecordBatch::project microbenchmark --- arrow-array/Cargo.toml | 4 ++ arrow-array/benches/record_batch.rs | 92 +++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 arrow-array/benches/record_batch.rs diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index e2cfd14e098b..3308fe7f2dea 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -80,3 +80,7 @@ harness = false [[bench]] name = "union_array" harness = false + +[[bench]] +name = "record_batch" +harness = false \ No newline at end of file diff --git a/arrow-array/benches/record_batch.rs b/arrow-array/benches/record_batch.rs new file mode 100644 index 000000000000..ec53ae413b3d --- /dev/null +++ b/arrow-array/benches/record_batch.rs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{ArrayRef, Int64Array, RecordBatch, RecordBatchOptions}; +use arrow_schema::{DataType, Field, Schema, SchemaRef}; +use criterion::*; +use num_integer::Integer; +use std::sync::Arc; + +fn make_record_batch(column_count: usize, row_count: usize) -> RecordBatch { + let fields = (0..column_count) + .map(|i| Field::new(format!("col_{}", i), DataType::Int64, i.is_even())) + .collect::>(); + + let columns = fields + .iter() + .map(|_| { + let array_ref: ArrayRef = Arc::new(Int64Array::from_value(0, row_count)); + array_ref + }) + .collect::>(); + + let schema = Schema::new(fields); + + let mut options = RecordBatchOptions::new(); + options.row_count = Some(row_count); + + RecordBatch::try_new_with_options(SchemaRef::new(schema), columns, &options).unwrap() +} + +fn project_benchmark( + c: &mut Criterion, + column_count: usize, + row_count: usize, + projection_size: usize, +) { + let input = make_input(column_count, row_count, projection_size); + + c.bench_with_input( + BenchmarkId::new( + "project", + format!( + "{:?}x{:?} -> {:?}x{:?}", + input.0.num_columns(), + input.0.num_rows(), + input.1.len(), + input.0.num_rows() + ), + ), + &input, + |b, (rb, projection)| { + b.iter(|| black_box(rb.project(projection).unwrap())); + }, + ); +} + +fn make_input( + column_count: usize, + row_count: usize, + projection_size: usize, +) -> (RecordBatch, Vec) { + let rb = make_record_batch(column_count, row_count); + let projection = (0..projection_size).collect::>(); + (rb, projection) +} + +fn criterion_benchmark(c: &mut Criterion) { + [10, 100, 1000].iter().for_each(|&column_count| { + [1, column_count / 2, column_count - 1] + .iter() + .for_each(|&projection_size| { + project_benchmark(c, column_count, 8192, projection_size); + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches);