Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[blocked] Switch to Utf8View for TPC-H #476

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ serde = "1.0.197"
serde_json = "1.0.116"
serde_test = "1.0.176"
simplelog = { version = "0.12.2", features = ["paris"] }
static_assertions = "1.1.0"
thiserror = "1.0.58"
tokio = "1.37.0"
uninit = "0.6.2"
Expand Down
1 change: 1 addition & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ vortex-scalar = { path = "../vortex-scalar", features = [
"serde",
] }
serde = { workspace = true, features = ["derive"] }
static_assertions = { workspace = true }

[target.'cfg(target_arch = "wasm32")'.dependencies]
# Enable the JS feature of getrandom (via rand) to supprt wasm32 target
Expand Down
16 changes: 8 additions & 8 deletions vortex-array/src/array/chunked/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,33 +196,32 @@ fn pack_primitives(
///
/// It is expected this function is only called from [try_canonicalize_chunks], and thus all chunks have
/// been checked to have the same DType already.
#[allow(unused)]
fn pack_views(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw you might find it easier to use the arrow view array builder to avoid alignment issues

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

until we have our own bytes type this is pretty fiddly/errorprone

chunks: &[Array],
dtype: &DType,
nullability: Nullability,
) -> VortexResult<VarBinViewArray> {
let length: usize = chunks.iter().map(|c| c.len()).sum();
let validity = validity_from_chunks(chunks, nullability);
let mut views = Vec::new();
let mut buffers = Vec::new();
for chunk in chunks {
// Each chunk's views have block IDs that are zero-referenced.
// Each chunk's views have buffer IDs that are zero-referenced.
// As part of the packing operation, we need to rewrite them to be referenced to the global
// merged blocks list.
// merged buffers list.
let buffers_offset = buffers.len();
let canonical_chunk = chunk.clone().into_varbinview().unwrap();

for block in canonical_chunk.buffers() {
let canonical_buffer = block.into_canonical().unwrap().into_array();
for buffer in canonical_chunk.buffers() {
let canonical_buffer = buffer.into_canonical().unwrap().into_array();
buffers.push(canonical_buffer);
}

// Write a new primitive array with all of the view outputs.
for view in canonical_chunk.view_slice() {
if view.is_inlined() {
// Inlined views can be copied directly into the output
views.push(*view);
} else {
// Referencing views must have their buffer_index adjusted with new offsets
let view_ref = view.as_view();
views.push(BinaryView::new_view(
view.len(),
Expand All @@ -235,7 +234,8 @@ fn pack_views(
}

let (view_ptr, view_len, view_cap) = views.into_raw_parts();
// Transmute the pointer to be to a set of u128, which is of identical size.
// Transmute the pointer to target type u128, which is of identical size and is
// an Arrow native type.
let views_u128 = unsafe {
Vec::from_raw_parts(
std::mem::transmute::<*mut BinaryView, *mut u128>(view_ptr),
Expand Down
8 changes: 6 additions & 2 deletions vortex-array/src/array/varbinview/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::array::varbin::varbin_scalar;
use crate::array::varbinview::{VarBinViewArray, VIEW_SIZE};
use crate::array::varbinview::{VarBinViewArray, VIEW_SIZE_BYTES};
use crate::arrow::FromArrowArray;
use crate::compute::unary::ScalarAtFn;
use crate::compute::{slice, ArrayCompute, SliceFn, TakeFn};
Expand Down Expand Up @@ -38,7 +38,11 @@ impl ScalarAtFn for VarBinViewArray {
impl SliceFn for VarBinViewArray {
fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> {
Ok(Self::try_new(
slice(&self.views(), start * VIEW_SIZE, stop * VIEW_SIZE)?,
slice(
&self.views(),
start * VIEW_SIZE_BYTES,
stop * VIEW_SIZE_BYTES,
)?,
(0..self.metadata().buffer_lens.len())
.map(|i| self.buffer(i))
.collect::<Vec<_>>(),
Expand Down
19 changes: 13 additions & 6 deletions vortex-array/src/array/varbinview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use arrow_array::builder::{BinaryViewBuilder, StringViewBuilder};
use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray};
use arrow_buffer::ScalarBuffer;
use itertools::Itertools;
use static_assertions::{assert_eq_align, assert_eq_size};
use vortex_dtype::{DType, PType};
use vortex_error::{vortex_bail, VortexResult};

Expand Down Expand Up @@ -96,6 +97,12 @@ pub union BinaryView {
_ref: Ref,
}

/// BinaryView must be 16 bytes and have 8 byte alignment
assert_eq_size!(BinaryView, [u8; 16]);
assert_eq_size!(Inlined, [u8; 16]);
assert_eq_size!(Ref, [u8; 16]);
assert_eq_align!(BinaryView, u64);

impl BinaryView {
pub const MAX_INLINED_SIZE: usize = 12;

Expand Down Expand Up @@ -159,7 +166,7 @@ impl Debug for BinaryView {
}

// reminder: views are 16 bytes with 8-byte alignment
pub(crate) const VIEW_SIZE: usize = size_of::<BinaryView>();
pub(crate) const VIEW_SIZE_BYTES: usize = size_of::<BinaryView>();

impl_encoding!("vortex.varbinview", 5u16, VarBinView);

Expand Down Expand Up @@ -218,7 +225,7 @@ impl VarBinViewArray {
vortex_bail!("incorrect validity {:?}", validity);
}

let num_views = views.len() / VIEW_SIZE;
let num_views = views.len() / VIEW_SIZE_BYTES;
let metadata = VarBinViewMetadata {
validity: validity.to_metadata(num_views)?,
buffer_lens: buffers.iter().map(|a| a.len()).collect_vec(),
Expand Down Expand Up @@ -250,7 +257,7 @@ impl VarBinViewArray {
.expect("Views must be a primitive array")
.maybe_null_slice::<u8>()
.as_ptr() as _,
self.views().len() / VIEW_SIZE,
self.views().len() / VIEW_SIZE_BYTES,
)
}
}
Expand All @@ -267,7 +274,7 @@ impl VarBinViewArray {
#[inline]
pub fn views(&self) -> Array {
self.array()
.child(0, &DType::BYTES, self.len() * VIEW_SIZE)
.child(0, &DType::BYTES, self.len() * VIEW_SIZE_BYTES)
.unwrap()
}

Expand Down Expand Up @@ -481,7 +488,7 @@ impl<'a> FromIterator<Option<&'a str>> for VarBinViewArray {
mod test {
use vortex_scalar::Scalar;

use crate::array::varbinview::{BinaryView, VarBinViewArray, VIEW_SIZE};
use crate::array::varbinview::{BinaryView, VarBinViewArray, VIEW_SIZE_BYTES};
use crate::compute::slice;
use crate::compute::unary::scalar_at;
use crate::{Canonical, IntoArray, IntoCanonical};
Expand Down Expand Up @@ -530,7 +537,7 @@ mod test {

#[test]
pub fn binary_view_size_and_alignment() {
assert_eq!(size_of::<BinaryView>(), VIEW_SIZE);
assert_eq!(size_of::<BinaryView>(), VIEW_SIZE_BYTES);
assert_eq!(size_of::<BinaryView>(), 16);
assert_eq!(align_of::<BinaryView>(), 8);
}
Expand Down
Loading