From 12ae5dcf5c909c6aacd6d31fe4cf76c570f07ddb Mon Sep 17 00:00:00 2001 From: Mahmut Bulut Date: Wed, 11 Nov 2020 04:37:01 +0100 Subject: [PATCH] ARROW-10551 - Fix unreproducible benches --- rust/arrow/benches/aggregate_kernels.rs | 3 ++- rust/arrow/benches/arithmetic_kernels.rs | 3 ++- rust/arrow/benches/builder.rs | 5 +++-- rust/arrow/benches/equal.rs | 7 ++++--- rust/arrow/benches/sort_kernel.rs | 3 ++- rust/arrow/benches/take_kernels.rs | 5 +++-- rust/arrow/src/util/bit_util.rs | 13 +++++++------ rust/arrow/src/util/test_util.rs | 9 +++++++-- 8 files changed, 30 insertions(+), 18 deletions(-) diff --git a/rust/arrow/benches/aggregate_kernels.rs b/rust/arrow/benches/aggregate_kernels.rs index 049a2844dff..be150c57cef 100644 --- a/rust/arrow/benches/aggregate_kernels.rs +++ b/rust/arrow/benches/aggregate_kernels.rs @@ -26,10 +26,11 @@ extern crate arrow; use arrow::array::*; use arrow::compute::kernels::aggregate::*; +use arrow::util::test_util::seedable_rng; fn create_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching - let mut rng = rand::thread_rng(); + let mut rng = seedable_rng(); let mut builder = Float32Builder::new(size); for _ in 0..size { diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs index 93ad32bbfc0..b3272d12e46 100644 --- a/rust/arrow/benches/arithmetic_kernels.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -27,10 +27,11 @@ extern crate arrow; use arrow::array::*; use arrow::compute::kernels::arithmetic::*; use arrow::compute::kernels::limit::*; +use arrow::util::test_util::seedable_rng; fn create_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching - let mut rng = rand::thread_rng(); + let mut rng = seedable_rng(); let mut builder = Float32Builder::new(size); for _ in 0..size { diff --git a/rust/arrow/benches/builder.rs b/rust/arrow/benches/builder.rs index 3bb7f85ed6a..29edafafc8b 100644 --- a/rust/arrow/benches/builder.rs +++ b/rust/arrow/benches/builder.rs @@ -23,9 +23,10 @@ use std::mem::size_of; use criterion::*; use rand::distributions::Standard; -use rand::{thread_rng, Rng}; use arrow::array::*; +use arrow::util::test_util::seedable_rng; +use rand::Rng; // Build arrays with 512k elements. const BATCH_SIZE: usize = 8 << 10; @@ -51,7 +52,7 @@ fn bench_primitive(c: &mut Criterion) { } fn bench_bool(c: &mut Criterion) { - let data: Vec = thread_rng() + let data: Vec = seedable_rng() .sample_iter(&Standard) .take(BATCH_SIZE) .collect(); diff --git a/rust/arrow/benches/equal.rs b/rust/arrow/benches/equal.rs index a73b70e1011..9d9c68abbb6 100644 --- a/rust/arrow/benches/equal.rs +++ b/rust/arrow/benches/equal.rs @@ -26,17 +26,18 @@ use std::sync::Arc; extern crate arrow; use arrow::array::*; +use arrow::util::test_util::seedable_rng; fn create_string_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching - let mut rng = rand::thread_rng(); + let mut rng = seedable_rng(); let mut builder = StringBuilder::new(size); for _ in 0..size { if with_nulls && rng.gen::() > 0.5 { builder.append_null().unwrap(); } else { - let string = rand::thread_rng() + let string = seedable_rng() .sample_iter(&Alphanumeric) .take(10) .collect::(); @@ -48,7 +49,7 @@ fn create_string_array(size: usize, with_nulls: bool) -> ArrayRef { fn create_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching - let mut rng = rand::thread_rng(); + let mut rng = seedable_rng(); let mut builder = Float32Builder::new(size); for _ in 0..size { diff --git a/rust/arrow/benches/sort_kernel.rs b/rust/arrow/benches/sort_kernel.rs index 3d2ddb08760..cf0644d8d94 100644 --- a/rust/arrow/benches/sort_kernel.rs +++ b/rust/arrow/benches/sort_kernel.rs @@ -26,10 +26,11 @@ extern crate arrow; use arrow::array::*; use arrow::compute::kernels::sort::{lexsort, SortColumn}; +use arrow::util::test_util::seedable_rng; fn create_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching - let mut rng = rand::thread_rng(); + let mut rng = seedable_rng(); let mut builder = Float32Builder::new(size); for _ in 0..size { diff --git a/rust/arrow/benches/take_kernels.rs b/rust/arrow/benches/take_kernels.rs index 99e695f8020..e6ad05b7816 100644 --- a/rust/arrow/benches/take_kernels.rs +++ b/rust/arrow/benches/take_kernels.rs @@ -30,6 +30,7 @@ extern crate arrow; use arrow::array::*; use arrow::compute::{cast, take}; use arrow::datatypes::*; +use arrow::util::test_util::seedable_rng; // cast array from specified primitive array type to desired data type fn create_numeric(size: usize) -> ArrayRef @@ -44,7 +45,7 @@ where fn create_strings(size: usize) -> ArrayRef { let v = (0..size) .map(|_| { - rand::thread_rng() + seedable_rng() .sample_iter(&Alphanumeric) .take(5) .collect::() @@ -57,7 +58,7 @@ fn create_strings(size: usize) -> ArrayRef { } fn create_random_index(size: usize) -> UInt32Array { - let mut rng = rand::thread_rng(); + let mut rng = seedable_rng(); let ints = Int32Array::from(vec![rng.gen_range(-24i32, size as i32); size]); // cast to u32, conveniently marking negative values as nulls UInt32Array::from( diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs index 2c21d482a38..834c264d0e8 100644 --- a/rust/arrow/src/util/bit_util.rs +++ b/rust/arrow/src/util/bit_util.rs @@ -199,10 +199,11 @@ where #[cfg(test)] mod tests { - use rand::{thread_rng, Rng}; use std::collections::HashSet; use super::*; + use crate::util::test_util::seedable_rng; + use rand::Rng; #[test] fn test_round_upto_multiple_of_64() { @@ -246,7 +247,7 @@ mod tests { const NUM_BYTE: usize = 10; let mut buf = vec![0; NUM_BYTE]; let mut expected = vec![]; - let mut rng = thread_rng(); + let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { let b = rng.gen_bool(0.5); expected.push(b); @@ -290,7 +291,7 @@ mod tests { const NUM_BYTE: usize = 10; let mut buf = vec![0; NUM_BYTE]; let mut expected = vec![]; - let mut rng = thread_rng(); + let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { let b = rng.gen_bool(0.5); expected.push(b); @@ -314,7 +315,7 @@ mod tests { const NUM_BYTE: usize = 10; let mut buf = vec![255; NUM_BYTE]; let mut expected = vec![]; - let mut rng = thread_rng(); + let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { let b = rng.gen_bool(0.5); expected.push(b); @@ -343,7 +344,7 @@ mod tests { let mut expected = Vec::with_capacity(NUM_BYTE * 8); expected.resize(NUM_BYTE * 8, false); - let mut rng = thread_rng(); + let mut rng = seedable_rng(); for _ in 0..NUM_BLOCKS { let start = rng.gen_range(0, NUM_BYTE * 8 - MAX_BLOCK_SIZE); @@ -371,7 +372,7 @@ mod tests { let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8]; let mut v = HashSet::new(); - let mut rng = thread_rng(); + let mut rng = seedable_rng(); for _ in 0..NUM_SETS { let offset = rng.gen_range(0, 8 * NUM_BYTES); v.insert(offset); diff --git a/rust/arrow/src/util/test_util.rs b/rust/arrow/src/util/test_util.rs index 44f7074ee3a..6a70edda417 100644 --- a/rust/arrow/src/util/test_util.rs +++ b/rust/arrow/src/util/test_util.rs @@ -17,19 +17,24 @@ //! Utils to make testing easier -use rand::{thread_rng, Rng}; +use rand::{rngs::StdRng, Rng, SeedableRng}; use std::{env, fs, io::Write}; /// Returns a vector of size `n`, filled with randomly generated bytes. pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; - let mut rng = thread_rng(); + let mut rng = seedable_rng(); for _ in 0..n { result.push(rng.gen_range(0, 255)); } result } +/// Returns fixed seedable RNG +pub fn seedable_rng() -> StdRng { + StdRng::seed_from_u64(42) +} + /// Returns file handle for a temp file in 'target' directory with a provided content /// /// TODO: Originates from `parquet` utils, can be merged in [ARROW-4064]