Skip to content
This repository was archived by the owner on Feb 18, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
67bb67a
Initial commit
jorgecarleitao Feb 6, 2021
97c6b12
Added buffer.
jorgecarleitao Feb 6, 2021
b11f692
Added Datatypes.
jorgecarleitao Feb 6, 2021
d4c2f44
Added primitive.
jorgecarleitao Feb 6, 2021
7949bfa
Added List.
jorgecarleitao Feb 6, 2021
3063fa9
Added Binary.
jorgecarleitao Feb 6, 2021
3720298
fixed binary.
jorgecarleitao Feb 6, 2021
db9a1d0
Dictionary.
jorgecarleitao Feb 6, 2021
54c5a2a
Slices.
jorgecarleitao Feb 6, 2021
b2de89c
Added equality
jorgecarleitao Feb 6, 2021
5bb6697
From vec.
jorgecarleitao Feb 6, 2021
f9b40f3
Added tests for equal
jorgecarleitao Feb 6, 2021
8184d2a
Fixed errors.
jorgecarleitao Feb 7, 2021
fff8bfd
Added take kernel and others.
jorgecarleitao Feb 7, 2021
2d35c99
String and boolean
jorgecarleitao Feb 7, 2021
54c57e9
refactor datatypes.
jorgecarleitao Feb 7, 2021
061e8ea
Added slice and value()
jorgecarleitao Feb 7, 2021
f2fc45c
Added struct.
jorgecarleitao Feb 7, 2021
6270738
Added FFI.
jorgecarleitao Feb 7, 2021
da5a325
Added Recordbatch
jorgecarleitao Feb 7, 2021
0e9c2e6
Moved stufff
jorgecarleitao Feb 7, 2021
7fd561d
Added dictionary and empty.
jorgecarleitao Feb 8, 2021
3f858f1
Added NullArray
jorgecarleitao Feb 8, 2021
2c515d2
Added FixedSizeList.
jorgecarleitao Feb 8, 2021
8e40f2b
Validated datatype of primitive.
jorgecarleitao Feb 8, 2021
5f5cdf2
Moved file.
jorgecarleitao Feb 8, 2021
3c26c6f
Moved content around.
jorgecarleitao Feb 8, 2021
a9e5c3f
Added falible version of from_trusted_len_iter.
jorgecarleitao Feb 8, 2021
e1a8494
Moved file.
jorgecarleitao Feb 8, 2021
b0ceab0
Moved.
jorgecarleitao Feb 8, 2021
0df7576
from bool for Bitmap
jorgecarleitao Feb 8, 2021
2221ba3
From trustedLEn for boolean.
jorgecarleitao Feb 8, 2021
0f2fd55
Simplify
jorgecarleitao Feb 8, 2021
71befc1
Added CSV reader.
jorgecarleitao Feb 8, 2021
44868b6
Added equal for boolean.
jorgecarleitao Feb 8, 2021
ff2b008
Simplified code.
jorgecarleitao Feb 8, 2021
d42bb3f
Minor simplification
jorgecarleitao Feb 8, 2021
faf5a77
Moved file.
jorgecarleitao Feb 8, 2021
4761e74
String stuff.
jorgecarleitao Feb 8, 2021
71f4b50
Added string equal
jorgecarleitao Feb 8, 2021
9cb2d21
Added equal to binary and decimal.
jorgecarleitao Feb 9, 2021
7e711f3
Added equal for null.
jorgecarleitao Feb 9, 2021
d5f9c80
Added FromIterator support for PrimitiveArray
jorgecarleitao Feb 9, 2021
77dc0bd
Moved.
jorgecarleitao Feb 9, 2021
45559e4
Added FromIterator for ListArray.
jorgecarleitao Feb 9, 2021
b2eaaa7
Added FromIterator for ListArray.
jorgecarleitao Feb 9, 2021
272f6a9
Added equality for list.
jorgecarleitao Feb 9, 2021
63c4c5d
Added arity kernel util.
jorgecarleitao Feb 9, 2021
f97900c
Moved file
jorgecarleitao Feb 9, 2021
c5392ea
Added iter for Binary.
jorgecarleitao Feb 9, 2021
d8205a3
Changed primitive signature.
jorgecarleitao Feb 9, 2021
7604213
Added clone of dyn Array
jorgecarleitao Feb 10, 2021
0ca8544
Added function to bitmap.
jorgecarleitao Feb 10, 2021
f57ee18
Change ListArray signature.
jorgecarleitao Feb 10, 2021
aedd2a0
Made take use Box.
jorgecarleitao Feb 10, 2021
962f071
Moved file.
jorgecarleitao Feb 10, 2021
ceee70c
Generalized code.
jorgecarleitao Feb 10, 2021
045c93f
Simplified dictionary.
jorgecarleitao Feb 10, 2021
fb4fc18
Improved Primitive<T>
jorgecarleitao Feb 10, 2021
c684772
Removed wrong mod
jorgecarleitao Feb 10, 2021
e397921
Added methods to build Dictionary.
jorgecarleitao Feb 10, 2021
d82fade
Added builder for Utf8
jorgecarleitao Feb 10, 2021
eb1a7a3
Fixed bug.
jorgecarleitao Feb 10, 2021
83a19e1
Added cast.
jorgecarleitao Feb 10, 2021
db86fb8
Improved creation.
jorgecarleitao Feb 11, 2021
671b017
Minor fixes.
jorgecarleitao Feb 11, 2021
57787a3
Added CSV writer.
jorgecarleitao Feb 11, 2021
abaf788
Fix spelling of 'immutable'
abreis Feb 9, 2021
636af5e
Added text
jorgecarleitao Feb 9, 2021
7857759
Fix spelling of 'immutable'
abreis Feb 9, 2021
06416ac
Tighten up the proposal
abreis Feb 9, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/target
Cargo.lock
32 changes: 32 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[package]
name = "arrow2"
version = "0.1.0"
authors = ["Jorge C. Leitao <jorgecarleitao@gmail.com>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
serde = { version = "1.0", features = ["rc"] }
serde_derive = "1.0"
serde_json = { version = "1.0", features = ["preserve_order"] }
rand = "0.7"
num = "0.3"
chrono = "0.4"

csv = { version = "1.1", optional = true }
regex = { version = "1.3", optional = true }
lazy_static = { version = "1.4", optional = true }
lexical-core = { version = "^0.7", optional = true }

[dev-dependencies]
criterion = "0.3"
tempfile = "3"

[features]
default = ["io_csv"]
io_csv = ["csv", "lazy_static", "lexical-core", "regex"]

[[bench]]
name = "take_kernels"
harness = false
217 changes: 216 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,219 @@
# Proposal: Safety by design in the Arrow crate

This document proposes a major redesign of the arrow crate to correctly handle
This document and repository proposes a major redesign of the arrow crate to correctly handle
memory safety, offsets and type safety.

TL;DR: this repo reproduces the main parts of the arrow crate with the proposed design in this repo. What it demonstrates:

1. allocations along cache lines, buffers and memory manangement
2. import and export using the FFI / C data interface
3. implementation of nested types (Dict, List, Struct)
4. `dyn Array` and dynamic typing
5. array equality
6. one kernel (`take`) for primitives (1.3x faster than current master).

Not demonstrated (but deemed feasible with the proposed design):

1. SIMD
2. IO (CSV / JSON)
3. `transform/` module (that would need to be migrated)

## Background

The arrow crate uses `Buffer`, a generic struct to store contiguous memory regions (of bytes). This construct is used to store data from all arrays in the Rust implementation. The simplest example is a buffer containing `1i32`, that is represented as `&[0u8, 0u8, 0u8, 1u8]` or `&[1u8, 0u8, 0u8, 0u8]` depending on endianness.

When a user wishes to read from a buffer, e.g. to perform a mathematical operation with its values, it needs to interpret the buffer in the target type. Because `Buffer` is a contiguous regions of bytes with no information about its underlying type, users must transmute its data into the respective type.

Arrow currently transmutes buffers on almost all operations, and very often does not verify that there is type alignment nor correct length when we transmute it to a slice of type `&[T]`.

Just as an example, the following code compiles, does not panic, and is unsound and results in UBs:

```rust
let buffer = Buffer::from(&[0i32, 2i32])
let data = ArrayData::new(DataType::Int64, 10, 0, None, 0, vec![buffer], vec![]);
let array = Float64Array::from(Arc::new(data));

println!("{:?}", array.value(1));
```

Note how this initializes a buffer with bytes from `i32`, initializes an `ArrayData` with dynamic type
`Int64`, and then a `Float64Array` from `Arc<ArrayData>`. `Float64Array`'s internals will essentially consume the pointer from the buffer, re-interpret it as `f64`, and offset it by `1`.

Still within this example, if we were to use `ArrayData`'s datatype, `Int64`, to transmute the buffer, we would be creating `&[i64]` out of a buffer created out of `i32`.

Any Rust developer acknowledges that this behavior goes very much against Rust's core premise that a function's behavior must not be undefined depending on whether the arguments are correct. The obvious observation is that transmute is one of the most `unsafe` Rust operations and not allowing the compiler to verify the necessary invariants is a large burden for users and developers to take.

This simple example indicates a broader problem with the current design, that we now explore in detail.

### Root cause analysis

At its core, Arrow's current design is centered around two main `structs`:

1. untyped `Buffer`
2. untyped `ArrayData`

#### 1. untyped `Buffer`

The crate's `Buffer` is untyped, which implies that once created, the type
information used to create it is lost. Consequently, the compiler has no way of verifying that a certain read can be performed. Thus, any read from it requires an alignment and size check at runtime. This is not only detrimental to performance, but also cumbersome.

Over the past 4 months, I have identified and fixed more than 10 instances of unsound code derived from the misuse, within the crate itself, of `Buffer`. This hints that there may be a design problem.

#### 2. untyped `ArrayData`

`ArrayData` is a `struct` containing buffers and child data that does not differentiate which type of array it represents at compile time.

Consequently, all buffer reads from `ArrayData`'s buffers are effectively `unsafe`, as they require certain invariants to hold. These invariants are strictly related to `ArrayData::datatype`: this `enum` differentiates how to transmute the `ArrayData::buffers`. For example, an `ArrayData::datatype` equal to `DataType::UInt32` implies that the buffer should be interpreted / transmuted as `u32`.

The challenge with the above struct is that it is not possible to prove that `ArrayData`'s creation and reads
are sound at compile time. As the sample above shows, there is nothing wrong, during compilation, with passing a buffer with `i32` to an `PrimitiveArray` expecting `i64` (via `ArrayData`). We could of course check it at runtime, and we should, but we are defeating the whole purpose of using a typed system as powerful as Rust offers.

The main consequence of this observation is that the current code has a significant maintenance cost, as we have to rigorously check the types of the buffers we are working with. The example above shows
that, even with that rigour, we fail to identify obvious problems at runtime.

Overall, there are many instances of our code where we expose public APIs marked as `safe` that are `unsafe` and lead to undefined behavior if used incorrectly. This goes against the core goals of the Rust language, and significantly weakens Arrow Rust's implementation core premise that the compiler and borrow checker proves many of the memory safety concerns that we may have.

Equally important, the inability of the compiler to prove certain invariants is detrimental to performance. As an example, the implementation of the `take` kernel in this repo is semantically equivalent to the current master, but 1.3x faster.

## Proposal

The proposal is to redesign the Arrow crate to address the design limitation described above.
This has a major impact into the whole ecosystem that relies on `Buffer`, `MutableBuffer`, `bytes`,
and has limited impact on high-end `Array` APIs that rely on iterators and other higher abstractions.

Broadly speaking, this proposes the following changes:

1. Replace `Buffer` by `Buffer<T>`
2. Replace `MutableBuffer` by `MutableBuffer<T>`
3. Replace `Bytes` by `Bytes<T>`
4. Remove `RawPointer`
5. Remove `ArrayData` and place its contents directly on the corresponding arrays
6. Make childs be `Arc<dyn Array>`
7. Remove `Array::data` and `Array::data_ref`
8. Redesign `bitmap` to hold offsets
9. Replace `Array::slice` by concrete implementations
10. Make `PrimitiveArray<NativeType>` instead of `PrimitiveType`

### 1-4. Replace `Buffer` by `Buffer<T>`

This is one of the core changes and is a major design change: `Buffer`s must be typed. There will be
an `unsafe` trait, `NativeType`, implemented for `u8, u16, u32, u64, i8, i16, i32, i64, f32, f64` corresponding to the only types that can be represented in a buffer.

Create a generic `Buffer<T: NativeType>`, `Bytes<T: NativeType>`, `MutableBuffer<T: NativeType>`, that corresponds to a byte-aligned, cache line-aligned contiguous memory regions.

This allow us to only have to deal with `transmute` at FFI boundaries. Effectively, it allow us to not
have to rely on the highly `unsafe` `RawPointer` on array implementations, as well as `as_typed` function that transmutes buffers.

[Here](src/buffer/immutable.rs) you can find the concrete implementation proposed in this repo.

### 5. Remove `ArrayData` and place its contents directly on the corresponding arrays

For example, for primitive types, such as `Float64` and `Date32`, declare a `PrimitiveArray<T>` as follows:

```rust
#[derive(Debug, Clone)]
pub struct PrimitiveArray<T: NativeType> {
data_type: DataType,
values: Buffer<T>,
validity: Option<Bitmap>,
offset: usize,
}
```

Note how `T` denotes the _physical_ representation, while `data_type` corresponds to the _logical_ representation. This is so that `Timestamp` with timezones becomes a first-class citizen (it currently isn't).

### 6. Child data is stored as `Arc<dyn Array>`

For example, the struct holding a `ListArray` is [defined](src/array/list.rs) as

```rust
#[derive(Debug, Clone)]
pub struct ListArray<O: Offset> {
data_type: DataType,
offsets: Buffer<O>,
values: Arc<dyn Array>,
validity: Option<Bitmap>,
offset: usize,
}
```

This greatly simplifies creating nested structures, as there is no longer any `ArrayData`.

Accessing individual (nested) values of this array, e.g. for iterations, works as before:

```rust
impl<O: Offset> ListArray<O> {
pub fn value(&self, i: usize) -> Box<dyn Array> {
let offsets = self.offsets.as_slice();
let offset = offsets[i];
let offset_1 = offsets[i + 1];
let length = (offset_1 - offset).to_usize().unwrap();

self.values.slice(offset.to_usize().unwrap(), length)
}
}
```

Note the usage of `Array::slice`, an abstract method that each specific implementation must know how to perform. This method has been problematic in the past because its implementation is type-specific, but
the current implementation is type-agnostic (i.e. a bug).

In the case of a list array:

```rust
impl<O: Offset> ListArray<O> {
pub fn slice(&self, offset: usize, length: usize) -> Self {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this API is very cool and very Rust like :thu

let validity = self.validity.as_ref().map(|x| x.slice(offset, length));
Self {
data_type: self.data_type.clone(),
offsets: self.offsets.slice(offset, length),
values: self.values.clone(),
validity,
offset,
}
}
}

impl<O: Offset> Array for ListArray<O> {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
}
```

Note how the `offsets` were sliced, but the `values` were not. In the current master, both get sliced, which
is semantically incorrect.

Also note that the choice of `Arc` over `Box` is solely for the purposes of enabling a cheap `Clone`.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this comment -- the example shows the use of a Box but the comment suggest that Arc is preferred to allow cheap clones

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right: the example is wrong: it is supposed to show an Arc.


### 7. Remove `Array::data` and `Array::data_ref`

Without `ArrayData`, these methods are no longer required. Required traits to enable FFI are instead
provided. This repo supports FFI (import and export), which demonstrates that `ArrayData` is not needed.

### 8. Redesign bitmap

This implementation redesigns `Bitmap` to allow it to hold `Bytes<u8>` and an offset in `bits`.
`Bitmap` is the only struct that holds bitmaps, and has methods to efficiently `get` bits.
Because it has an offset in bits, it contains all information required to correctly offset itself.

This way, users no longer have to use `MutableBuffer<u8>` to handle `bitmaps`, use `unsafe` `get_bit_raw`,
offsetting in bits vs bytes, etc.

### 9. Replace `Array::slice` by concrete implementations

Slice is an operation whose implementation depends on the particular logical type being implemented.
This proposes that we move `slice` to be a type-specific implementation.

### 10. Make `PrimitiveArray<T: NativeType>` instead of `PrimitiveType`

Currently, `PrimitiveArray` depends on a `ArrowPrimitiveType`, which has an associated `DataType`.
This makes it difficult to distinguish the physical representation from its logical one. I.e. `Int64Type` is both
a physical (`i64`) and logical type (`DataType::Int64`). There are logical types whose physical representation
is the same (e.g. `Timestamp(_, _)`). Hard-coding the logical representation in the type takes away this fundamental
separation.

This proposal separates the two aspects: the generic argument, `T`, is used to declare the physical layout, which, within Rust, is used for type-safety.
The `DataType` is used for a logical representation which, in the context of Rust, is used for dynamic typing, i.e. it enables the trait Object `Array` to implement `as_any()` and use `Array::data_type()` to decide to which concrete
implementation `&dyn Array` should be `downcast_ref`ed to.

With this design, an incorrect `DataType` only causes `downcast_ref` to fail and cannot cause undefined behavior. The only possible undefined behavior in this new design is at FFI boundaries: a byte buffer that is incorrect for a `DataType` causes the library to interpret bytes of type `x` as type `y`, which is undefined behavior.
88 changes: 88 additions & 0 deletions benches/take_kernels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;
use criterion::Criterion;

use rand::distributions::{Distribution, Standard};
use rand::{rngs::StdRng, Rng, SeedableRng};

use arrow2::{array::*, datatypes::DataType, datatypes::PrimitiveType};
use arrow2::{compute::take, datatypes::Int32Type};

/// Returns fixed seedable RNG
pub fn seedable_rng() -> StdRng {
StdRng::seed_from_u64(42)
}

// cast array from specified primitive array type to desired data type
fn create_primitive<T>(size: usize) -> PrimitiveArray<T::Native>
where
T: PrimitiveType,
Standard: Distribution<T::Native>,
{
seedable_rng()
.sample_iter(&Standard)
.take(size)
.map(Some)
.collect::<Primitive<T::Native>>()
.to(T::DATA_TYPE)
}

fn create_random_index(size: usize, null_density: f32) -> PrimitiveArray<i32> {
let mut rng = seedable_rng();
(0..size)
.map(|_| {
if rng.gen::<f32>() > null_density {
let value = rng.gen_range::<i32, _, _>(0i32, size as i32);
Some(value)
} else {
None
}
})
.collect::<Primitive<i32>>()
.to(DataType::Int32)
}

fn bench_take(values: &dyn Array, indices: &PrimitiveArray<i32>) {
criterion::black_box(take::take(values, &indices, None).unwrap());
}

fn add_benchmark(c: &mut Criterion) {
let values = create_primitive::<Int32Type>(512);
let indices = create_random_index(512, 0.0);
c.bench_function("take i32 512", |b| b.iter(|| bench_take(&values, &indices)));
let values = create_primitive::<Int32Type>(1024);
let indices = create_random_index(1024, 0.0);
c.bench_function("take i32 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});

let indices = create_random_index(512, 0.5);
c.bench_function("take i32 nulls 512", |b| {
b.iter(|| bench_take(&values, &indices))
});
let values = create_primitive::<Int32Type>(1024);
let indices = create_random_index(1024, 0.5);
c.bench_function("take i32 nulls 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
Loading