Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add set_bits fuzz test #6394

Merged
merged 5 commits into from
Sep 20, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 215 additions & 105 deletions arrow-buffer/src/util/bit_mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,125 +64,235 @@ pub fn set_bits(
#[cfg(test)]
mod tests {
use super::*;
use crate::bit_util::unset_bit;
use rand::prelude::StdRng;
use rand::{Fill, Rng, SeedableRng};
use std::fmt::Display;

#[test]
fn test_set_bits_aligned() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];

let destination_offset = 8;
let source_offset = 0;

let len = 64;

let expected_data: &[u8] = &[
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0,
];
let expected_null_count = 24;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
],
offset_write: 8,
offset_read: 0,
len: 64,
expected_data: vec![
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101, 0,
],
expected_null_count: 24,
}
.verify();
}

#[test]
fn test_set_bits_unaligned_destination_start() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];

let destination_offset = 3;
let source_offset = 0;

let len = 64;

let expected_data: &[u8] = &[
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
0b00101111, 0b00000101, 0b00000000,
];
let expected_null_count = 24;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
],
offset_write: 3,
offset_read: 0,
len: 64,
expected_data: vec![
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
0b00101111, 0b00000101, 0b00000000,
],
expected_null_count: 24,
}
.verify();
}

#[test]
fn test_set_bits_unaligned_destination_end() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];

let destination_offset = 8;
let source_offset = 0;

let len = 62;

let expected_data: &[u8] = &[
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b00100101, 0,
];
let expected_null_count = 23;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
],
offset_write: 8,
offset_read: 0,
len: 62,
expected_data: vec![
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b00100101, 0,
],
expected_null_count: 23,
}
.verify();
}

#[test]
fn test_set_bits_unaligned() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
0b10011001, 0b11011011, 0b11101011, 0b11000011,
];

let destination_offset = 3;
let source_offset = 5;

let len = 95;

let expected_data: &[u8] = &[
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
];
let expected_null_count = 35;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
0b10011001, 0b11011011, 0b11101011, 0b11000011,
],
offset_write: 3,
offset_read: 5,
len: 95,
expected_data: vec![
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
],
expected_null_count: 35,
}
.verify();
}

#[test]
fn set_bits_fuz() {
alamb marked this conversation as resolved.
Show resolved Hide resolved
let mut rng = StdRng::seed_from_u64(42);
let mut data = SetBitsTest::new();
for _ in 0..10000 {
alamb marked this conversation as resolved.
Show resolved Hide resolved
data.regen(&mut rng);
data.verify();
}
}

#[derive(Debug, Default)]
struct SetBitsTest {
/// target write data
write_data: Vec<u8>,
/// source data
data: Vec<u8>,
offset_write: usize,
offset_read: usize,
len: usize,
/// the expected contents of write_data after the test
expected_data: Vec<u8>,
/// the expected number of nulls copied at the end of the test
expected_null_count: usize,
}

/// prints a byte slice as a binary string like "01010101 10101010"
struct BinaryFormatter<'a>(&'a [u8]);
impl<'a> Display for BinaryFormatter<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for byte in self.0 {
write!(f, "{:08b} ", byte)?;
}
write!(f, " ")?;
Ok(())
}
}

impl Display for SetBitsTest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "SetBitsTest {{")?;
writeln!(f, " write_data: {}", BinaryFormatter(&self.write_data))?;
writeln!(f, " data: {}", BinaryFormatter(&self.data))?;
writeln!(
f,
" expected_data: {}",
BinaryFormatter(&self.expected_data)
)?;
writeln!(f, " offset_write: {}", self.offset_write)?;
writeln!(f, " offset_read: {}", self.offset_read)?;
writeln!(f, " len: {}", self.len)?;
writeln!(f, " expected_null_count: {}", self.expected_null_count)?;
writeln!(f, "}}")
}
}

impl SetBitsTest {
/// create a new instance of FuzzData
fn new() -> Self {
Self::default()
}

/// Update this instance's fields with randomly selected values and expected data
fn regen(&mut self, rng: &mut StdRng) {
// (read) data
// ------------------+-----------------+-------
// .. offset_read .. | data | ...
// ------------------+-----------------+-------

// Write data
// -------------------+-----------------+-------
// .. offset_write .. | (data to write) | ...
// -------------------+-----------------+-------

// length of data to copy
let len = rng.gen_range(0..=200);

// randomly pick where we will write to
let offset_write_bits = rng.gen_range(0..=200);
let offset_write_bytes = if offset_write_bits % 8 == 0 {
offset_write_bits / 8
} else {
(offset_write_bits / 8) + 1
};
let extra_write_data_bytes = rng.gen_range(0..=5); // ensure 0 shows up often

// randomly decide where we will read from
let extra_read_data_bytes = rng.gen_range(0..=5); // make sure 0 shows up often
let offset_read_bits = rng.gen_range(0..=200);
let offset_read_bytes = if offset_read_bits % 8 != 0 {
(offset_read_bits / 8) + 1
} else {
offset_read_bits / 8
};

// create space for writing
self.write_data.clear();
self.write_data
.resize(offset_write_bytes + len + extra_write_data_bytes, 0);

// interestingly set_bits seems to assume the output is already zeroed
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was surprised at this -- if we fill the target with random data the tests fail (I am not sure why)

// the fuzz tests fail when this is uncommented
//self.write_data.try_fill(rng).unwrap();
self.offset_write = offset_write_bits;

// make source data
self.data
.resize(offset_read_bytes + len + extra_read_data_bytes, 0);
// fill source data with random bytes
self.data.try_fill(rng).unwrap();
self.offset_read = offset_read_bits;

self.len = len;

// generated expectated output (not efficient)
self.expected_data.resize(self.write_data.len(), 0);
self.expected_data.copy_from_slice(&self.write_data);

self.expected_null_count = 0;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fuzz tester runs this code to compute the expected output (using set_bit and get_bit)

for i in 0..self.len {
let bit = get_bit(&self.data, self.offset_read + i);
if bit {
set_bit(&mut self.expected_data, self.offset_write + i);
} else {
unset_bit(&mut self.expected_data, self.offset_write + i);
self.expected_null_count += 1;
}
}
}

/// call set_bits with the given parameters and compare with the expected output
fn verify(&self) {
// call set_bits and compare
let mut actual = self.write_data.to_vec();
let null_count = set_bits(
&mut actual,
&self.data,
self.offset_write,
self.offset_read,
self.len,
);

assert_eq!(actual, self.expected_data, "self: {}", self);
assert_eq!(null_count, self.expected_null_count, "self: {}", self);
}
}
}
Loading