Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions python/python/benchmarks/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

@pytest.mark.parametrize(
"version",
["2.0", "2.1"],
ids=["2_0", "2_1"],
["2.0", "2.1", "2.2"],
ids=["2_0", "2_1", "2_2"],
)
@pytest.mark.benchmark(group="scan_single_column")
def test_scan_integer(tmp_path: Path, benchmark, version):
Expand Down Expand Up @@ -47,8 +47,8 @@ def read_all():

@pytest.mark.parametrize(
"version",
["2.0", "2.1"],
ids=["2_0", "2_1"],
["2.0", "2.1", "2.2"],
ids=["2_0", "2_1", "2_2"],
)
@pytest.mark.benchmark(group="scan_single_column")
def test_scan_nullable_integer(tmp_path: Path, benchmark, version):
Expand Down Expand Up @@ -133,8 +133,8 @@ def read_all():

@pytest.mark.parametrize(
"version",
["2.0", "2.1"],
ids=["2_0", "2_1"],
["2.0", "2.1", "2.2"],
ids=["2_0", "2_1", "2_2"],
)
@pytest.mark.benchmark(group="sample_single_column")
def test_sample_integer(tmp_path: Path, benchmark, version):
Expand Down
4 changes: 3 additions & 1 deletion python/python/benchmarks/test_take.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ def gen_ranges(total_rows, num_rows):
@pytest.mark.benchmark()
@pytest.mark.parametrize("file_size", [1024 * 1024], ids=["1MB"])
@pytest.mark.parametrize(
"lance_format_version", [("2.0", "V2_0"), ("2.1", "V2_1")], ids=["V2_0", "V2_1"]
"lance_format_version",
[("2.0", "V2_0"), ("2.1", "V2_1"), ("2.2", "V2_2")],
ids=["V2_0", "V2_1", "V2_2"],
)
@pytest.mark.parametrize("num_rows", [100, 1000], ids=["100rows", "1000rows"])
@pytest.mark.parametrize(
Expand Down
26 changes: 26 additions & 0 deletions python/python/tests/compat/test_file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,32 @@ def check_write(self):
writer.write_batch(build_basic_types())


# File format 2.2 is not in the stable 2.0.x line; gate this on the first
# available pre-release that includes 2.2 support.
@compat_test(min_version="4.0.0b1")
class BasicTypes2_2(UpgradeDowngradeTest):
"""Test file format 2.2 compatibility with basic data types."""

def __init__(self, path: Path):
self.path = path

def create(self):
batch = build_basic_types()
with LanceFileWriter(
str(self.path), version="2.2", schema=batch.schema
) as writer:
writer.write_batch(batch)

def check_read(self):
reader = LanceFileReader(str(self.path))
table = reader.read_all().to_table()
assert table == build_basic_types()

def check_write(self):
with LanceFileWriter(str(self.path), version="2.2") as writer:
writer.write_batch(build_basic_types())


@compat_test(min_version="0.16.0")
@pytest.mark.parametrize(
"data_factory,name",
Expand Down
30 changes: 14 additions & 16 deletions python/python/tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_schema_only(tmp_path):
def test_write_with_max_page_bytes(tmp_path):
path = tmp_path / "foo.lance"
schema = pa.schema([pa.field("a", pa.int64())])
for version in ["2.0", "2.1"]:
for version in ["2.0", "2.1", "2.2"]:
with LanceFileWriter(
str(path), schema, max_page_bytes=1, version=version
) as writer:
Expand Down Expand Up @@ -91,23 +91,21 @@ def test_multiple_close(tmp_path):


def test_version(tmp_path):
path = tmp_path / "foo.lance"
schema = pa.schema([pa.field("a", pa.int64())])
cases = [
("foo.lance", "2.0", (0, 3)),
("foo2.lance", "2.1", (2, 1)),
("foo3.lance", "2.2", (2, 2)),
]

with LanceFileWriter(str(path), schema, version="2.0") as writer:
writer.write_batch(pa.table({"a": [1, 2, 3]}))
reader = LanceFileReader(str(path))
metadata = reader.metadata()
assert metadata.major_version == 0
assert metadata.minor_version == 3

path = tmp_path / "foo2.lance"
with LanceFileWriter(str(path), schema, version="2.1") as writer:
writer.write_batch(pa.table({"a": [1, 2, 3]}))
reader = LanceFileReader(str(path))
metadata = reader.metadata()
assert metadata.major_version == 2
assert metadata.minor_version == 1
for file_name, version, (major, minor) in cases:
path = tmp_path / file_name
with LanceFileWriter(str(path), schema, version=version) as writer:
writer.write_batch(pa.table({"a": [1, 2, 3]}))
reader = LanceFileReader(str(path))
metadata = reader.metadata()
assert metadata.major_version == major
assert metadata.minor_version == minor


def test_take(tmp_path):
Expand Down
6 changes: 5 additions & 1 deletion rust/lance-encoding/benches/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,11 @@ fn bench_decode_fsl(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
let mut group = c.benchmark_group("decode_fsl");
const NUM_BYTES: u64 = 1024 * 1024 * 128;
for version in [LanceFileVersion::V2_0, LanceFileVersion::V2_1] {
for version in [
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
] {
for data_type in PRIMITIVE_TYPES_FOR_FSL {
for dimension in [4, 16, 32, 64, 128] {
let nullable_choices: &[bool] = if version == LanceFileVersion::V2_0 {
Expand Down
12 changes: 10 additions & 2 deletions rust/lance-file/benches/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ use std::collections::HashMap;
use tokio::runtime::Runtime;

fn bench_reader(c: &mut Criterion) {
for version in [LanceFileVersion::V2_0, LanceFileVersion::V2_1] {
for version in [
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
] {
let mut group = c.benchmark_group(format!("reader_{}", version));
let data = lance_datagen::gen_batch()
.anon_col(lance_datagen::array::rand_type(&DataType::Int32))
Expand Down Expand Up @@ -357,7 +361,11 @@ fn bench_random_access(c: &mut Criterion) {

let mut group = c.benchmark_group("take");

let versions = [LanceFileVersion::V2_0, LanceFileVersion::V2_1];
let versions = [
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
];

for filesystem in filesystems {
for version in versions {
Expand Down
6 changes: 4 additions & 2 deletions rust/lance-file/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1774,7 +1774,8 @@ pub mod tests {
#[rstest]
#[test_log::test(tokio::test)]
async fn test_projection(
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion,
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)]
version: LanceFileVersion,
) {
let fs = FsFixture::default();

Expand Down Expand Up @@ -2026,7 +2027,8 @@ pub mod tests {
#[rstest]
#[tokio::test]
async fn test_blocking_take(
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion,
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)]
version: LanceFileVersion,
) {
let fs = FsFixture::default();
let WrittenFile { data, schema, .. } = create_some_file(&fs, version).await;
Expand Down
6 changes: 5 additions & 1 deletion rust/lance/benches/vector_throughput.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,11 @@ fn bench_ivf_pq_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("ivf_pq_throughput");
group.throughput(Throughput::Elements(NUM_QUERIES as u64));

for &version in &[LanceFileVersion::V2_0, LanceFileVersion::V2_1] {
for &version in &[
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
] {
// Get or create cached dataset
let cached_dataset = get_or_create_dataset(&rt, version);

Expand Down
2 changes: 2 additions & 0 deletions rust/lance/src/dataset/fragment/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ mod tests {
#[values(
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
LanceFileVersion::Legacy,
LanceFileVersion::Stable
)]
Expand Down Expand Up @@ -570,6 +571,7 @@ mod tests {
#[values(
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
LanceFileVersion::Legacy,
LanceFileVersion::Stable
)]
Expand Down
1 change: 1 addition & 0 deletions rust/lance/src/dataset/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,7 @@ mod tests {
LanceFileVersion::Legacy,
LanceFileVersion::V2_0,
LanceFileVersion::V2_1,
LanceFileVersion::V2_2,
LanceFileVersion::Stable,
LanceFileVersion::Next,
];
Expand Down
4 changes: 2 additions & 2 deletions rust/lance/src/dataset/write/merge_insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5536,7 +5536,7 @@ MergeInsert: on=[id], when_matched=UpdateAll, when_not_matched=InsertAll, when_n
async fn test_duplicate_rowid_detection(
#[values(false, true)] is_full_schema: bool,
#[values(true, false)] enable_stable_row_ids: bool,
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)]
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)]
data_storage_version: LanceFileVersion,
) {
let test_uri = "memory://test_duplicate_rowid_multi_fragment.lance";
Expand Down Expand Up @@ -5604,7 +5604,7 @@ MergeInsert: on=[id], when_matched=UpdateAll, when_not_matched=InsertAll, when_n
async fn test_source_dedupe_behavior_first_seen(
#[values(false, true)] is_full_schema: bool,
#[values(true, false)] enable_stable_row_ids: bool,
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)]
#[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)]
data_storage_version: LanceFileVersion,
) {
let test_uri = format!(
Expand Down
Loading