diff --git a/python/python/benchmarks/test_file.py b/python/python/benchmarks/test_file.py index 16d63c84a60..a47e925cf33 100644 --- a/python/python/benchmarks/test_file.py +++ b/python/python/benchmarks/test_file.py @@ -12,8 +12,8 @@ @pytest.mark.parametrize( "version", - ["2.0", "2.1"], - ids=["2_0", "2_1"], + ["2.0", "2.1", "2.2"], + ids=["2_0", "2_1", "2_2"], ) @pytest.mark.benchmark(group="scan_single_column") def test_scan_integer(tmp_path: Path, benchmark, version): @@ -47,8 +47,8 @@ def read_all(): @pytest.mark.parametrize( "version", - ["2.0", "2.1"], - ids=["2_0", "2_1"], + ["2.0", "2.1", "2.2"], + ids=["2_0", "2_1", "2_2"], ) @pytest.mark.benchmark(group="scan_single_column") def test_scan_nullable_integer(tmp_path: Path, benchmark, version): @@ -133,8 +133,8 @@ def read_all(): @pytest.mark.parametrize( "version", - ["2.0", "2.1"], - ids=["2_0", "2_1"], + ["2.0", "2.1", "2.2"], + ids=["2_0", "2_1", "2_2"], ) @pytest.mark.benchmark(group="sample_single_column") def test_sample_integer(tmp_path: Path, benchmark, version): diff --git a/python/python/benchmarks/test_take.py b/python/python/benchmarks/test_take.py index e7c8a2a46a9..68bd91b3681 100644 --- a/python/python/benchmarks/test_take.py +++ b/python/python/benchmarks/test_take.py @@ -127,7 +127,9 @@ def gen_ranges(total_rows, num_rows): @pytest.mark.benchmark() @pytest.mark.parametrize("file_size", [1024 * 1024], ids=["1MB"]) @pytest.mark.parametrize( - "lance_format_version", [("2.0", "V2_0"), ("2.1", "V2_1")], ids=["V2_0", "V2_1"] + "lance_format_version", + [("2.0", "V2_0"), ("2.1", "V2_1"), ("2.2", "V2_2")], + ids=["V2_0", "V2_1", "V2_2"], ) @pytest.mark.parametrize("num_rows", [100, 1000], ids=["100rows", "1000rows"]) @pytest.mark.parametrize( diff --git a/python/python/tests/compat/test_file_formats.py b/python/python/tests/compat/test_file_formats.py index f65c8611ff6..9ceafa81a49 100644 --- a/python/python/tests/compat/test_file_formats.py +++ b/python/python/tests/compat/test_file_formats.py @@ -48,6 +48,32 @@ def check_write(self): writer.write_batch(build_basic_types()) +# File format 2.2 is not in the stable 2.0.x line; gate this on the first +# available pre-release that includes 2.2 support. +@compat_test(min_version="4.0.0b1") +class BasicTypes2_2(UpgradeDowngradeTest): + """Test file format 2.2 compatibility with basic data types.""" + + def __init__(self, path: Path): + self.path = path + + def create(self): + batch = build_basic_types() + with LanceFileWriter( + str(self.path), version="2.2", schema=batch.schema + ) as writer: + writer.write_batch(batch) + + def check_read(self): + reader = LanceFileReader(str(self.path)) + table = reader.read_all().to_table() + assert table == build_basic_types() + + def check_write(self): + with LanceFileWriter(str(self.path), version="2.2") as writer: + writer.write_batch(build_basic_types()) + + @compat_test(min_version="0.16.0") @pytest.mark.parametrize( "data_factory,name", diff --git a/python/python/tests/test_file.py b/python/python/tests/test_file.py index 5ac5886e2e6..f3b3d40729e 100644 --- a/python/python/tests/test_file.py +++ b/python/python/tests/test_file.py @@ -61,7 +61,7 @@ def test_schema_only(tmp_path): def test_write_with_max_page_bytes(tmp_path): path = tmp_path / "foo.lance" schema = pa.schema([pa.field("a", pa.int64())]) - for version in ["2.0", "2.1"]: + for version in ["2.0", "2.1", "2.2"]: with LanceFileWriter( str(path), schema, max_page_bytes=1, version=version ) as writer: @@ -91,23 +91,21 @@ def test_multiple_close(tmp_path): def test_version(tmp_path): - path = tmp_path / "foo.lance" schema = pa.schema([pa.field("a", pa.int64())]) + cases = [ + ("foo.lance", "2.0", (0, 3)), + ("foo2.lance", "2.1", (2, 1)), + ("foo3.lance", "2.2", (2, 2)), + ] - with LanceFileWriter(str(path), schema, version="2.0") as writer: - writer.write_batch(pa.table({"a": [1, 2, 3]})) - reader = LanceFileReader(str(path)) - metadata = reader.metadata() - assert metadata.major_version == 0 - assert metadata.minor_version == 3 - - path = tmp_path / "foo2.lance" - with LanceFileWriter(str(path), schema, version="2.1") as writer: - writer.write_batch(pa.table({"a": [1, 2, 3]})) - reader = LanceFileReader(str(path)) - metadata = reader.metadata() - assert metadata.major_version == 2 - assert metadata.minor_version == 1 + for file_name, version, (major, minor) in cases: + path = tmp_path / file_name + with LanceFileWriter(str(path), schema, version=version) as writer: + writer.write_batch(pa.table({"a": [1, 2, 3]})) + reader = LanceFileReader(str(path)) + metadata = reader.metadata() + assert metadata.major_version == major + assert metadata.minor_version == minor def test_take(tmp_path): diff --git a/rust/lance-encoding/benches/decoder.rs b/rust/lance-encoding/benches/decoder.rs index 1b64f8fe711..9cf696bce36 100644 --- a/rust/lance-encoding/benches/decoder.rs +++ b/rust/lance-encoding/benches/decoder.rs @@ -95,7 +95,11 @@ fn bench_decode_fsl(c: &mut Criterion) { let rt = tokio::runtime::Runtime::new().unwrap(); let mut group = c.benchmark_group("decode_fsl"); const NUM_BYTES: u64 = 1024 * 1024 * 128; - for version in [LanceFileVersion::V2_0, LanceFileVersion::V2_1] { + for version in [ + LanceFileVersion::V2_0, + LanceFileVersion::V2_1, + LanceFileVersion::V2_2, + ] { for data_type in PRIMITIVE_TYPES_FOR_FSL { for dimension in [4, 16, 32, 64, 128] { let nullable_choices: &[bool] = if version == LanceFileVersion::V2_0 { diff --git a/rust/lance-file/benches/reader.rs b/rust/lance-file/benches/reader.rs index a00af5015fa..a9f849d88a0 100644 --- a/rust/lance-file/benches/reader.rs +++ b/rust/lance-file/benches/reader.rs @@ -25,7 +25,11 @@ use std::collections::HashMap; use tokio::runtime::Runtime; fn bench_reader(c: &mut Criterion) { - for version in [LanceFileVersion::V2_0, LanceFileVersion::V2_1] { + for version in [ + LanceFileVersion::V2_0, + LanceFileVersion::V2_1, + LanceFileVersion::V2_2, + ] { let mut group = c.benchmark_group(format!("reader_{}", version)); let data = lance_datagen::gen_batch() .anon_col(lance_datagen::array::rand_type(&DataType::Int32)) @@ -357,7 +361,11 @@ fn bench_random_access(c: &mut Criterion) { let mut group = c.benchmark_group("take"); - let versions = [LanceFileVersion::V2_0, LanceFileVersion::V2_1]; + let versions = [ + LanceFileVersion::V2_0, + LanceFileVersion::V2_1, + LanceFileVersion::V2_2, + ]; for filesystem in filesystems { for version in versions { diff --git a/rust/lance-file/src/reader.rs b/rust/lance-file/src/reader.rs index 4c48edf5e9e..982c2602d7a 100644 --- a/rust/lance-file/src/reader.rs +++ b/rust/lance-file/src/reader.rs @@ -1774,7 +1774,8 @@ pub mod tests { #[rstest] #[test_log::test(tokio::test)] async fn test_projection( - #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion, + #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)] + version: LanceFileVersion, ) { let fs = FsFixture::default(); @@ -2026,7 +2027,8 @@ pub mod tests { #[rstest] #[tokio::test] async fn test_blocking_take( - #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion, + #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)] + version: LanceFileVersion, ) { let fs = FsFixture::default(); let WrittenFile { data, schema, .. } = create_some_file(&fs, version).await; diff --git a/rust/lance/benches/vector_throughput.rs b/rust/lance/benches/vector_throughput.rs index aa557863d2b..e7d14232a46 100644 --- a/rust/lance/benches/vector_throughput.rs +++ b/rust/lance/benches/vector_throughput.rs @@ -287,7 +287,11 @@ fn bench_ivf_pq_throughput(c: &mut Criterion) { let mut group = c.benchmark_group("ivf_pq_throughput"); group.throughput(Throughput::Elements(NUM_QUERIES as u64)); - for &version in &[LanceFileVersion::V2_0, LanceFileVersion::V2_1] { + for &version in &[ + LanceFileVersion::V2_0, + LanceFileVersion::V2_1, + LanceFileVersion::V2_2, + ] { // Get or create cached dataset let cached_dataset = get_or_create_dataset(&rt, version); diff --git a/rust/lance/src/dataset/fragment/write.rs b/rust/lance/src/dataset/fragment/write.rs index bc8f78871b4..afc950f7fae 100644 --- a/rust/lance/src/dataset/fragment/write.rs +++ b/rust/lance/src/dataset/fragment/write.rs @@ -539,6 +539,7 @@ mod tests { #[values( LanceFileVersion::V2_0, LanceFileVersion::V2_1, + LanceFileVersion::V2_2, LanceFileVersion::Legacy, LanceFileVersion::Stable )] @@ -570,6 +571,7 @@ mod tests { #[values( LanceFileVersion::V2_0, LanceFileVersion::V2_1, + LanceFileVersion::V2_2, LanceFileVersion::Legacy, LanceFileVersion::Stable )] diff --git a/rust/lance/src/dataset/write.rs b/rust/lance/src/dataset/write.rs index 942848375f0..f5c4f97d45e 100644 --- a/rust/lance/src/dataset/write.rs +++ b/rust/lance/src/dataset/write.rs @@ -1391,6 +1391,7 @@ mod tests { LanceFileVersion::Legacy, LanceFileVersion::V2_0, LanceFileVersion::V2_1, + LanceFileVersion::V2_2, LanceFileVersion::Stable, LanceFileVersion::Next, ]; diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 11a69f69da8..478b20971fd 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -5536,7 +5536,7 @@ MergeInsert: on=[id], when_matched=UpdateAll, when_not_matched=InsertAll, when_n async fn test_duplicate_rowid_detection( #[values(false, true)] is_full_schema: bool, #[values(true, false)] enable_stable_row_ids: bool, - #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] + #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)] data_storage_version: LanceFileVersion, ) { let test_uri = "memory://test_duplicate_rowid_multi_fragment.lance"; @@ -5604,7 +5604,7 @@ MergeInsert: on=[id], when_matched=UpdateAll, when_not_matched=InsertAll, when_n async fn test_source_dedupe_behavior_first_seen( #[values(false, true)] is_full_schema: bool, #[values(true, false)] enable_stable_row_ids: bool, - #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] + #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1, LanceFileVersion::V2_2)] data_storage_version: LanceFileVersion, ) { let test_uri = format!(