Skip to content
Draft
12 changes: 12 additions & 0 deletions cpp/src/parquet/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,18 @@ std::string ParquetVersionToString(ParquetVersion::type ver) {
return "2.4";
case ParquetVersion::PARQUET_2_6:
return "2.6";
case ParquetVersion::PARQUET_2_7:
return "2.7";
case ParquetVersion::PARQUET_2_8:
return "2.8";
case ParquetVersion::PARQUET_2_9:
return "2.9";
case ParquetVersion::PARQUET_2_10:
return "2.10";
case ParquetVersion::PARQUET_2_11:
return "2.11";
case ParquetVersion::PARQUET_2_12:
return "2.12";
}

// This should be unreachable
Expand Down
18 changes: 15 additions & 3 deletions cpp/src/parquet/metadata_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ TEST(Metadata, TestBuildAccess) {
WriterProperties::Builder prop_builder;

std::shared_ptr<WriterProperties> props =
prop_builder.version(ParquetVersion::PARQUET_2_6)->build();
prop_builder.version(ParquetVersion::PARQUET_2_LATEST)->build();

fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
Expand Down Expand Up @@ -137,7 +137,7 @@ TEST(Metadata, TestBuildAccess) {
ASSERT_EQ(nrows, f_accessors[loop_index]->num_rows());
ASSERT_LE(0, static_cast<int>(f_accessors[loop_index]->size()));
ASSERT_EQ(2, f_accessors[loop_index]->num_row_groups());
ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessors[loop_index]->version());
ASSERT_EQ(ParquetVersion::PARQUET_2_LATEST, f_accessors[loop_index]->version());
ASSERT_EQ(DEFAULT_CREATED_BY, f_accessors[loop_index]->created_by());
ASSERT_EQ(3, f_accessors[loop_index]->num_schema_elements());

Expand Down Expand Up @@ -256,7 +256,7 @@ TEST(Metadata, TestBuildAccess) {
ASSERT_EQ(4, f_accessor->num_row_groups());
ASSERT_EQ(nrows * 2, f_accessor->num_rows());
ASSERT_LE(0, static_cast<int>(f_accessor->size()));
ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessor->version());
ASSERT_EQ(ParquetVersion::PARQUET_2_LATEST, f_accessor->version());
ASSERT_EQ(DEFAULT_CREATED_BY, f_accessor->created_by());
ASSERT_EQ(3, f_accessor->num_schema_elements());

Expand Down Expand Up @@ -745,5 +745,17 @@ TEST(ApplicationVersion, FullWithSpaces) {
ASSERT_EQ("cd", version.version.build_info);
}

TEST(ParquetVersionToString, AllVersions) {
ASSERT_EQ("1.0", ParquetVersionToString(ParquetVersion::PARQUET_1_0));
ASSERT_EQ("2.4", ParquetVersionToString(ParquetVersion::PARQUET_2_4));
ASSERT_EQ("2.6", ParquetVersionToString(ParquetVersion::PARQUET_2_6));
ASSERT_EQ("2.7", ParquetVersionToString(ParquetVersion::PARQUET_2_7));
ASSERT_EQ("2.8", ParquetVersionToString(ParquetVersion::PARQUET_2_8));
ASSERT_EQ("2.9", ParquetVersionToString(ParquetVersion::PARQUET_2_9));
ASSERT_EQ("2.10", ParquetVersionToString(ParquetVersion::PARQUET_2_10));
ASSERT_EQ("2.11", ParquetVersionToString(ParquetVersion::PARQUET_2_11));
ASSERT_EQ("2.12", ParquetVersionToString(ParquetVersion::PARQUET_2_12));
}

} // namespace metadata
} // namespace parquet
51 changes: 50 additions & 1 deletion cpp/src/parquet/type_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,60 @@ struct ParquetVersion {
/// Note: Parquet format 2.6.0 was released in September 2018.
PARQUET_2_6,

/// Enable Parquet format 2.7 and earlier features when writing
///
/// This enables bloom filters and encryption in addition to the
/// PARQUET_2_6 features.
///
/// Note: Parquet format 2.7.0 was released in June 2019.
PARQUET_2_7,

/// Enable Parquet format 2.8 and earlier features when writing
///
/// This enables BYTE_STREAM_SPLIT encoding in addition to the
/// PARQUET_2_7 features.
///
/// Note: Parquet format 2.8.0 was released in February 2020.
PARQUET_2_8,

/// Enable Parquet format 2.9 and earlier features when writing
///
/// This enables interoperable LZ4 codec in addition to the
/// PARQUET_2_8 features.
///
/// Note: Parquet format 2.9.0 was released in January 2021.
PARQUET_2_9,

/// Enable Parquet format 2.10 and earlier features when writing
///
/// This enables Float16 logical type in addition to the
/// PARQUET_2_9 features.
///
/// Note: Parquet format 2.10.0 was released in October 2022.
PARQUET_2_10,

/// Enable Parquet format 2.11 and earlier features when writing
///
/// This enables VARIANT logical type, GEOMETRY/GEOGRAPHY types,
/// and extended BYTE_STREAM_SPLIT encoding for INT32/INT64/FIXED_LEN_BYTE_ARRAY
/// in addition to the PARQUET_2_10 features.
///
/// Note: Parquet format 2.11.0 was released in March 2025.
PARQUET_2_11,

/// Enable Parquet format 2.12 and earlier features when writing
///
/// This finalizes the VARIANT logical type specification and shredding
/// in addition to the PARQUET_2_11 features.
///
/// Note: Parquet format 2.12.0 was released in August 2025.
PARQUET_2_12,

/// Enable latest Parquet format 2.x features
///
/// This value is equal to the greatest 2.x version supported by
/// this library.
PARQUET_2_LATEST = PARQUET_2_6
PARQUET_2_LATEST = PARQUET_2_12
};
};

Expand Down
6 changes: 6 additions & 0 deletions python/pyarrow/includes/libparquet.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"
ParquetVersion_V2_7" parquet::ParquetVersion::PARQUET_2_7"
ParquetVersion_V2_8" parquet::ParquetVersion::PARQUET_2_8"
ParquetVersion_V2_9" parquet::ParquetVersion::PARQUET_2_9"
ParquetVersion_V2_10" parquet::ParquetVersion::PARQUET_2_10"
ParquetVersion_V2_11" parquet::ParquetVersion::PARQUET_2_11"
ParquetVersion_V2_12" parquet::ParquetVersion::PARQUET_2_12"

enum ParquetSortOrder" parquet::SortOrder::type":
ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED"
Expand Down
4 changes: 3 additions & 1 deletion r/R/enums.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ FileType <- enum("FileType",
#' @export
#' @rdname enums
ParquetVersionType <- enum("ParquetVersionType",
PARQUET_1_0 = 0L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L
PARQUET_1_0 = 0L, PARQUET_2_4 = 1L, PARQUET_2_6 = 2L,
PARQUET_2_7 = 3L, PARQUET_2_8 = 4L, PARQUET_2_9 = 5L, PARQUET_2_10 = 6L,
PARQUET_2_11 = 7L, PARQUET_2_12 = 8L
Comment on lines +138 to +140
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently this change ensures it matches up with the C++; not sure about this myself.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonkeane If we export this enum, does that mean we shouldn't go changing previous values, or that it's actually important to sync up with the C++ values? I'm not confident I fully understand how enums are used here and what matches what.

)

#' @export
Expand Down
8 changes: 7 additions & 1 deletion r/R/parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,13 @@ valid_parquet_version <- c(
"1.0" = ParquetVersionType$PARQUET_1_0,
"2.4" = ParquetVersionType$PARQUET_2_4,
"2.6" = ParquetVersionType$PARQUET_2_6,
"latest" = ParquetVersionType$PARQUET_2_6
"2.7" = ParquetVersionType$PARQUET_2_7,
"2.8" = ParquetVersionType$PARQUET_2_8,
"2.9" = ParquetVersionType$PARQUET_2_9,
"2.10" = ParquetVersionType$PARQUET_2_10,
"2.11" = ParquetVersionType$PARQUET_2_11,
"2.12" = ParquetVersionType$PARQUET_2_12,
"latest" = ParquetVersionType$PARQUET_2_12
)

make_valid_parquet_version <- function(version, valid_versions = valid_parquet_version) {
Expand Down
26 changes: 25 additions & 1 deletion r/tests/testthat/test-parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,33 @@ test_that("make_valid_parquet_version()", {
make_valid_parquet_version("2.6"),
ParquetVersionType$PARQUET_2_6
)
expect_equal(
make_valid_parquet_version("2.7"),
ParquetVersionType$PARQUET_2_7
)
expect_equal(
make_valid_parquet_version("2.8"),
ParquetVersionType$PARQUET_2_8
)
expect_equal(
make_valid_parquet_version("2.9"),
ParquetVersionType$PARQUET_2_9
)
expect_equal(
make_valid_parquet_version("2.10"),
ParquetVersionType$PARQUET_2_10
)
expect_equal(
make_valid_parquet_version("2.11"),
ParquetVersionType$PARQUET_2_11
)
expect_equal(
make_valid_parquet_version("2.12"),
ParquetVersionType$PARQUET_2_12
)
expect_equal(
make_valid_parquet_version("latest"),
ParquetVersionType$PARQUET_2_6
ParquetVersionType$PARQUET_2_12
)

expect_equal(make_valid_parquet_version(1), ParquetVersionType$PARQUET_1_0)
Expand Down
Loading