From a98e40342161cb8d96d150f7443be0e0ffd55ebb Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 09:40:03 -0400 Subject: [PATCH 01/13] Update Parquet versions and add tests --- cpp/src/parquet/metadata.cc | 8 ++++++++ cpp/src/parquet/metadata_test.cc | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 4b1822c0dae..9bef4a4ab87 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -83,6 +83,14 @@ std::string ParquetVersionToString(ParquetVersion::type ver) { return "2.4"; case ParquetVersion::PARQUET_2_6: return "2.6"; + case ParquetVersion::PARQUET_2_7: + return "2.7"; + case ParquetVersion::PARQUET_2_8: + return "2.8"; + case ParquetVersion::PARQUET_2_9: + return "2.9"; + case ParquetVersion::PARQUET_2_10: + return "2.10"; } // This should be unreachable diff --git a/cpp/src/parquet/metadata_test.cc b/cpp/src/parquet/metadata_test.cc index 572f053179c..8e00b847da9 100644 --- a/cpp/src/parquet/metadata_test.cc +++ b/cpp/src/parquet/metadata_test.cc @@ -745,5 +745,15 @@ TEST(ApplicationVersion, FullWithSpaces) { ASSERT_EQ("cd", version.version.build_info); } +TEST(ParquetVersionToString, AllVersions) { + ASSERT_EQ("1.0", ParquetVersionToString(ParquetVersion::PARQUET_1_0)); + ASSERT_EQ("2.4", ParquetVersionToString(ParquetVersion::PARQUET_2_4)); + ASSERT_EQ("2.6", ParquetVersionToString(ParquetVersion::PARQUET_2_6)); + ASSERT_EQ("2.7", ParquetVersionToString(ParquetVersion::PARQUET_2_7)); + ASSERT_EQ("2.8", ParquetVersionToString(ParquetVersion::PARQUET_2_8)); + ASSERT_EQ("2.9", ParquetVersionToString(ParquetVersion::PARQUET_2_9)); + ASSERT_EQ("2.10", ParquetVersionToString(ParquetVersion::PARQUET_2_10)); +} + } // namespace metadata } // namespace parquet From 902d73000eebe585519ffcd850aa99174b09990c Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 09:41:05 -0400 Subject: [PATCH 02/13] Add type enum content --- cpp/src/parquet/type_fwd.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/type_fwd.h b/cpp/src/parquet/type_fwd.h index 02e896598bf..16f74923b52 100644 --- a/cpp/src/parquet/type_fwd.h +++ b/cpp/src/parquet/type_fwd.h @@ -54,11 +54,43 @@ struct ParquetVersion { /// Note: Parquet format 2.6.0 was released in September 2018. PARQUET_2_6, + /// Enable Parquet format 2.7 and earlier features when writing + /// + /// This enables bloom filters and encryption in addition to the + /// PARQUET_2_6 features. + /// + /// Note: Parquet format 2.7.0 was released in June 2019. + PARQUET_2_7, + + /// Enable Parquet format 2.8 and earlier features when writing + /// + /// This enables BYTE_STREAM_SPLIT encoding in addition to the + /// PARQUET_2_7 features. + /// + /// Note: Parquet format 2.8.0 was released in February 2020. + PARQUET_2_8, + + /// Enable Parquet format 2.9 and earlier features when writing + /// + /// This enables interoperable LZ4 codec in addition to the + /// PARQUET_2_8 features. + /// + /// Note: Parquet format 2.9.0 was released in January 2021. + PARQUET_2_9, + + /// Enable Parquet format 2.10 and earlier features when writing + /// + /// This enables Float16 logical type in addition to the + /// PARQUET_2_9 features. + /// + /// Note: Parquet format 2.10.0 was released in October 2022. + PARQUET_2_10, + /// Enable latest Parquet format 2.x features /// /// This value is equal to the greatest 2.x version supported by /// this library. - PARQUET_2_LATEST = PARQUET_2_6 + PARQUET_2_LATEST = PARQUET_2_10 }; }; From 30f1a6f4e3bc436ec4f5706c01bf02381a7e3108 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 09:50:58 -0400 Subject: [PATCH 03/13] Add new versions to PyArrow --- python/pyarrow/includes/libparquet.pxd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyarrow/includes/libparquet.pxd b/python/pyarrow/includes/libparquet.pxd index d9dd9d1aec9..d1f4cca3142 100644 --- a/python/pyarrow/includes/libparquet.pxd +++ b/python/pyarrow/includes/libparquet.pxd @@ -144,6 +144,10 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil: ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0" ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4" ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6" + ParquetVersion_V2_7" parquet::ParquetVersion::PARQUET_2_7" + ParquetVersion_V2_8" parquet::ParquetVersion::PARQUET_2_8" + ParquetVersion_V2_9" parquet::ParquetVersion::PARQUET_2_9" + ParquetVersion_V2_10" parquet::ParquetVersion::PARQUET_2_10" enum ParquetSortOrder" parquet::SortOrder::type": ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED" From c57868efc176df6e2aeb9472e8eef7451721ca6e Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 09:51:21 -0400 Subject: [PATCH 04/13] Add new versions and tests to R --- r/R/enums.R | 3 ++- r/R/parquet.R | 6 +++++- r/tests/testthat/test-parquet.R | 18 +++++++++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/r/R/enums.R b/r/R/enums.R index a28728552f8..0e3d251b9f8 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -135,7 +135,8 @@ FileType <- enum("FileType", #' @export #' @rdname enums ParquetVersionType <- enum("ParquetVersionType", - PARQUET_1_0 = 0L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L + PARQUET_1_0 = 0L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L, + PARQUET_2_7 = 4L, PARQUET_2_8 = 5L, PARQUET_2_9 = 6L, PARQUET_2_10 = 7L ) #' @export diff --git a/r/R/parquet.R b/r/R/parquet.R index 91ddfc63a29..0a1e68bb3d8 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -234,7 +234,11 @@ valid_parquet_version <- c( "1.0" = ParquetVersionType$PARQUET_1_0, "2.4" = ParquetVersionType$PARQUET_2_4, "2.6" = ParquetVersionType$PARQUET_2_6, - "latest" = ParquetVersionType$PARQUET_2_6 + "2.7" = ParquetVersionType$PARQUET_2_7, + "2.8" = ParquetVersionType$PARQUET_2_8, + "2.9" = ParquetVersionType$PARQUET_2_9, + "2.10" = ParquetVersionType$PARQUET_2_10, + "latest" = ParquetVersionType$PARQUET_2_10 ) make_valid_parquet_version <- function(version, valid_versions = valid_parquet_version) { diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 3cf3786a410..84d740609d1 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -142,9 +142,25 @@ test_that("make_valid_parquet_version()", { make_valid_parquet_version("2.6"), ParquetVersionType$PARQUET_2_6 ) + expect_equal( + make_valid_parquet_version("2.7"), + ParquetVersionType$PARQUET_2_7 + ) + expect_equal( + make_valid_parquet_version("2.8"), + ParquetVersionType$PARQUET_2_8 + ) + expect_equal( + make_valid_parquet_version("2.9"), + ParquetVersionType$PARQUET_2_9 + ) + expect_equal( + make_valid_parquet_version("2.10"), + ParquetVersionType$PARQUET_2_10 + ) expect_equal( make_valid_parquet_version("latest"), - ParquetVersionType$PARQUET_2_6 + ParquetVersionType$PARQUET_2_10 ) expect_equal(make_valid_parquet_version(1), ParquetVersionType$PARQUET_1_0) From 2df44a811f1adf5b47e3d04a5ed9ae0e49eb550d Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 10:07:42 -0400 Subject: [PATCH 05/13] add 11 and 12 to python too --- cpp/src/parquet/metadata.cc | 4 ++++ cpp/src/parquet/metadata_test.cc | 2 ++ cpp/src/parquet/type_fwd.h | 19 ++++++++++++++++++- r/R/enums.R | 3 ++- r/R/parquet.R | 4 +++- r/tests/testthat/test-parquet.R | 10 +++++++++- 6 files changed, 38 insertions(+), 4 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 9bef4a4ab87..a86673e489e 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -91,6 +91,10 @@ std::string ParquetVersionToString(ParquetVersion::type ver) { return "2.9"; case ParquetVersion::PARQUET_2_10: return "2.10"; + case ParquetVersion::PARQUET_2_11: + return "2.11"; + case ParquetVersion::PARQUET_2_12: + return "2.12"; } // This should be unreachable diff --git a/cpp/src/parquet/metadata_test.cc b/cpp/src/parquet/metadata_test.cc index 8e00b847da9..302162bbe23 100644 --- a/cpp/src/parquet/metadata_test.cc +++ b/cpp/src/parquet/metadata_test.cc @@ -753,6 +753,8 @@ TEST(ParquetVersionToString, AllVersions) { ASSERT_EQ("2.8", ParquetVersionToString(ParquetVersion::PARQUET_2_8)); ASSERT_EQ("2.9", ParquetVersionToString(ParquetVersion::PARQUET_2_9)); ASSERT_EQ("2.10", ParquetVersionToString(ParquetVersion::PARQUET_2_10)); + ASSERT_EQ("2.11", ParquetVersionToString(ParquetVersion::PARQUET_2_11)); + ASSERT_EQ("2.12", ParquetVersionToString(ParquetVersion::PARQUET_2_12)); } } // namespace metadata diff --git a/cpp/src/parquet/type_fwd.h b/cpp/src/parquet/type_fwd.h index 16f74923b52..24b3c12a21e 100644 --- a/cpp/src/parquet/type_fwd.h +++ b/cpp/src/parquet/type_fwd.h @@ -86,11 +86,28 @@ struct ParquetVersion { /// Note: Parquet format 2.10.0 was released in October 2022. PARQUET_2_10, + /// Enable Parquet format 2.11 and earlier features when writing + /// + /// This enables VARIANT logical type, GEOMETRY/GEOGRAPHY types, + /// and extended BYTE_STREAM_SPLIT encoding for INT32/INT64/FIXED_LEN_BYTE_ARRAY + /// in addition to the PARQUET_2_10 features. + /// + /// Note: Parquet format 2.11.0 was released in March 2025. + PARQUET_2_11, + + /// Enable Parquet format 2.12 and earlier features when writing + /// + /// This finalizes the VARIANT logical type specification and shredding + /// in addition to the PARQUET_2_11 features. + /// + /// Note: Parquet format 2.12.0 was released in August 2025. + PARQUET_2_12, + /// Enable latest Parquet format 2.x features /// /// This value is equal to the greatest 2.x version supported by /// this library. - PARQUET_2_LATEST = PARQUET_2_10 + PARQUET_2_LATEST = PARQUET_2_12 }; }; diff --git a/r/R/enums.R b/r/R/enums.R index 0e3d251b9f8..bbab69d2b36 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -136,7 +136,8 @@ FileType <- enum("FileType", #' @rdname enums ParquetVersionType <- enum("ParquetVersionType", PARQUET_1_0 = 0L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L, - PARQUET_2_7 = 4L, PARQUET_2_8 = 5L, PARQUET_2_9 = 6L, PARQUET_2_10 = 7L + PARQUET_2_7 = 4L, PARQUET_2_8 = 5L, PARQUET_2_9 = 6L, PARQUET_2_10 = 7L, + PARQUET_2_11 = 8L, PARQUET_2_12 = 9L ) #' @export diff --git a/r/R/parquet.R b/r/R/parquet.R index 0a1e68bb3d8..6cf08567dd3 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -238,7 +238,9 @@ valid_parquet_version <- c( "2.8" = ParquetVersionType$PARQUET_2_8, "2.9" = ParquetVersionType$PARQUET_2_9, "2.10" = ParquetVersionType$PARQUET_2_10, - "latest" = ParquetVersionType$PARQUET_2_10 + "2.11" = ParquetVersionType$PARQUET_2_11, + "2.12" = ParquetVersionType$PARQUET_2_12, + "latest" = ParquetVersionType$PARQUET_2_12 ) make_valid_parquet_version <- function(version, valid_versions = valid_parquet_version) { diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 84d740609d1..a2ad25d8ede 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -158,9 +158,17 @@ test_that("make_valid_parquet_version()", { make_valid_parquet_version("2.10"), ParquetVersionType$PARQUET_2_10 ) + expect_equal( + make_valid_parquet_version("2.11"), + ParquetVersionType$PARQUET_2_11 + ) + expect_equal( + make_valid_parquet_version("2.12"), + ParquetVersionType$PARQUET_2_12 + ) expect_equal( make_valid_parquet_version("latest"), - ParquetVersionType$PARQUET_2_10 + ParquetVersionType$PARQUET_2_12 ) expect_equal(make_valid_parquet_version(1), ParquetVersionType$PARQUET_1_0) From 0b077673519f567c9eca6d88fac8db1315c5c185 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 15:29:05 +0100 Subject: [PATCH 06/13] add 11 and 12 to python --- python/pyarrow/includes/libparquet.pxd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/includes/libparquet.pxd b/python/pyarrow/includes/libparquet.pxd index d1f4cca3142..df25b85b9e6 100644 --- a/python/pyarrow/includes/libparquet.pxd +++ b/python/pyarrow/includes/libparquet.pxd @@ -148,6 +148,8 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil: ParquetVersion_V2_8" parquet::ParquetVersion::PARQUET_2_8" ParquetVersion_V2_9" parquet::ParquetVersion::PARQUET_2_9" ParquetVersion_V2_10" parquet::ParquetVersion::PARQUET_2_10" + ParquetVersion_V2_11" parquet::ParquetVersion::PARQUET_2_11" + ParquetVersion_V2_12" parquet::ParquetVersion::PARQUET_2_12" enum ParquetSortOrder" parquet::SortOrder::type": ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED" From e82d8a2320844096717968b348de325b07f546e4 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 17:12:08 +0100 Subject: [PATCH 07/13] Update test to use parquet_latest --- cpp/src/parquet/metadata_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/metadata_test.cc b/cpp/src/parquet/metadata_test.cc index 302162bbe23..be7a59ae29d 100644 --- a/cpp/src/parquet/metadata_test.cc +++ b/cpp/src/parquet/metadata_test.cc @@ -94,7 +94,7 @@ TEST(Metadata, TestBuildAccess) { WriterProperties::Builder prop_builder; std::shared_ptr props = - prop_builder.version(ParquetVersion::PARQUET_2_6)->build(); + prop_builder.version(ParquetVersion::PARQUET_2_LATEST)->build(); fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED)); fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED)); @@ -137,7 +137,7 @@ TEST(Metadata, TestBuildAccess) { ASSERT_EQ(nrows, f_accessors[loop_index]->num_rows()); ASSERT_LE(0, static_cast(f_accessors[loop_index]->size())); ASSERT_EQ(2, f_accessors[loop_index]->num_row_groups()); - ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessors[loop_index]->version()); + ASSERT_EQ(ParquetVersion::PARQUET_2_LATEST, f_accessors[loop_index]->version()); ASSERT_EQ(DEFAULT_CREATED_BY, f_accessors[loop_index]->created_by()); ASSERT_EQ(3, f_accessors[loop_index]->num_schema_elements()); @@ -256,7 +256,7 @@ TEST(Metadata, TestBuildAccess) { ASSERT_EQ(4, f_accessor->num_row_groups()); ASSERT_EQ(nrows * 2, f_accessor->num_rows()); ASSERT_LE(0, static_cast(f_accessor->size())); - ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessor->version()); + ASSERT_EQ(ParquetVersion::PARQUET_2_LATEST, f_accessor->version()); ASSERT_EQ(DEFAULT_CREATED_BY, f_accessor->created_by()); ASSERT_EQ(3, f_accessor->num_schema_elements()); From a0d7ee64de70b3cc434afdbf814c9b2ac48f5ff4 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 17:12:39 +0100 Subject: [PATCH 08/13] Update R enums to match C++ --- r/R/enums.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/R/enums.R b/r/R/enums.R index bbab69d2b36..e5097cc0519 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -135,9 +135,9 @@ FileType <- enum("FileType", #' @export #' @rdname enums ParquetVersionType <- enum("ParquetVersionType", - PARQUET_1_0 = 0L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L, - PARQUET_2_7 = 4L, PARQUET_2_8 = 5L, PARQUET_2_9 = 6L, PARQUET_2_10 = 7L, - PARQUET_2_11 = 8L, PARQUET_2_12 = 9L + PARQUET_1_0 = 0L, PARQUET_2_4 = 1L, PARQUET_2_6 = 2L, + PARQUET_2_7 = 3L, PARQUET_2_8 = 4L, PARQUET_2_9 = 5L, PARQUET_2_10 = 6L, + PARQUET_2_11 = 7L, PARQUET_2_12 = 8L ) #' @export From c7f23268dc958ccdae260326251665f07287f2d6 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 18:16:35 +0100 Subject: [PATCH 09/13] Add references to latest Python --- python/pyarrow/_parquet.pyx | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index d59c70a2744..11ce81934d9 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1113,7 +1113,7 @@ cdef class FileMetaData(_Weakrefable): """ Parquet format version used in file (str, such as '1.0', '2.4'). - If version is missing or unparsable, will default to assuming '2.6'. + If version is missing or unparsable, will default to assuming '2.12'. """ cdef ParquetVersion version = self._metadata.version() if version == ParquetVersion_V1: @@ -1122,9 +1122,21 @@ cdef class FileMetaData(_Weakrefable): return '2.4' elif version == ParquetVersion_V2_6: return '2.6' + elif version == ParquetVersion_V2_7: + return '2.7' + elif version == ParquetVersion_V2_8: + return '2.8' + elif version == ParquetVersion_V2_9: + return '2.9' + elif version == ParquetVersion_V2_10: + return '2.10' + elif version == ParquetVersion_V2_11: + return '2.11' + elif version == ParquetVersion_V2_12: + return '2.12' else: - warnings.warn(f'Unrecognized file version, assuming 2.6: {version}') - return '2.6' + warnings.warn(f'Unrecognized file version, assuming 2.12: {version}') + return '2.12' @property def created_by(self): From 838abf5f65b92aa823cd0f63ba1ec452dcc3464a Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 19:26:25 +0100 Subject: [PATCH 10/13] Update Python tests to refer to latest Parquet --- python/pyarrow/tests/parquet/test_metadata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index 148bfebaa67..241177020e3 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -67,7 +67,7 @@ def test_parquet_metadata_api(): assert meta.num_rows == len(df) assert meta.num_columns == ncols + 1 # +1 for index assert meta.num_row_groups == 1 - assert meta.format_version == '2.6' + assert meta.format_version == '2.12' assert 'parquet-cpp' in meta.created_by assert isinstance(meta.serialized_size, int) assert isinstance(meta.metadata, dict) @@ -554,12 +554,12 @@ def test_write_metadata(tempdir): assert b'ARROW:schema' not in schema_as_arrow.metadata # pass through writer keyword arguments - for version in ["1.0", "2.4", "2.6"]: + for version in ["1.0", "2.4", "2.6", "2.7", "2.8", "2.9", "2.10", "2.11", "2.12"]: pq.write_metadata(schema, path, version=version) parquet_meta = pq.read_metadata(path) # The version is stored as a single integer in the Parquet metadata, # so it cannot correctly express dotted format versions - expected_version = "1.0" if version == "1.0" else "2.6" + expected_version = "1.0" if version == "1.0" else "2.12" assert parquet_meta.format_version == expected_version # metadata_collector: list of FileMetaData objects From 5a657970c1f862d632e52fec32ef547cbc24ebe3 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 19:27:27 +0100 Subject: [PATCH 11/13] Update cpp/src/parquet/metadata_test.cc --- cpp/src/parquet/metadata_test.cc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cpp/src/parquet/metadata_test.cc b/cpp/src/parquet/metadata_test.cc index be7a59ae29d..1f0101bf298 100644 --- a/cpp/src/parquet/metadata_test.cc +++ b/cpp/src/parquet/metadata_test.cc @@ -745,17 +745,5 @@ TEST(ApplicationVersion, FullWithSpaces) { ASSERT_EQ("cd", version.version.build_info); } -TEST(ParquetVersionToString, AllVersions) { - ASSERT_EQ("1.0", ParquetVersionToString(ParquetVersion::PARQUET_1_0)); - ASSERT_EQ("2.4", ParquetVersionToString(ParquetVersion::PARQUET_2_4)); - ASSERT_EQ("2.6", ParquetVersionToString(ParquetVersion::PARQUET_2_6)); - ASSERT_EQ("2.7", ParquetVersionToString(ParquetVersion::PARQUET_2_7)); - ASSERT_EQ("2.8", ParquetVersionToString(ParquetVersion::PARQUET_2_8)); - ASSERT_EQ("2.9", ParquetVersionToString(ParquetVersion::PARQUET_2_9)); - ASSERT_EQ("2.10", ParquetVersionToString(ParquetVersion::PARQUET_2_10)); - ASSERT_EQ("2.11", ParquetVersionToString(ParquetVersion::PARQUET_2_11)); - ASSERT_EQ("2.12", ParquetVersionToString(ParquetVersion::PARQUET_2_12)); -} - } // namespace metadata } // namespace parquet From 7b634fe22e66bb266f3d0c22ee0466cc6e7b9e12 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 19:40:49 +0100 Subject: [PATCH 12/13] Update default version assumption --- python/pyarrow/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 32bcebb28de..a00b345295b 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -4935,7 +4935,7 @@ def test_write_dataset_parquet(tempdir): base_dir = tempdir / f'parquet_dataset_version{version}' ds.write_dataset(table, base_dir, format=format, file_options=opts) meta = pq.read_metadata(base_dir / "part-0.parquet") - expected_version = "1.0" if version == "1.0" else "2.6" + expected_version = "1.0" if version == "1.0" else "2.12" assert meta.format_version == expected_version # ensure version is actually honored based on supported datatypes From 2160b675ae5a5adc8f884b5ea39f65664f29a6ee Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 22 Sep 2025 19:41:32 +0100 Subject: [PATCH 13/13] Update other Parquet version test --- python/pyarrow/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index a00b345295b..658f0d8a65d 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -4928,7 +4928,7 @@ def test_write_dataset_parquet(tempdir): assert result.equals(table) # using custom options - for version in ["1.0", "2.4", "2.6"]: + for version in ["1.0", "2.4", "2.6", "2.7", "2.8", "2.9", "2.10", "2.11", "2.12"]: format = ds.ParquetFileFormat() opts = format.make_write_options(version=version) assert "