Skip to content

Commit

Permalink
Rename ColumnOptions to ParquetColumnOptions (#11512)
Browse files Browse the repository at this point in the history
* Rename `ColumnOptions` to `ParquetColumnOptions`

* Update error message
  • Loading branch information
alamb committed Jul 29, 2024
1 parent 75ced38 commit c50fd88
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 41 deletions.
4 changes: 2 additions & 2 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,7 @@ pub struct TableParquetOptions {
/// Global Parquet options that propagates to all columns.
pub global: ParquetOptions,
/// Column specific options. Default usage is parquet.XX::column.
pub column_specific_options: HashMap<String, ColumnOptions>,
pub column_specific_options: HashMap<String, ParquetColumnOptions>,
/// Additional file-level metadata to include. Inserted into the key_value_metadata
/// for the written [`FileMetaData`](https://docs.rs/parquet/latest/parquet/file/metadata/struct.FileMetaData.html).
///
Expand Down Expand Up @@ -1555,7 +1555,7 @@ config_namespace_with_hashmap! {
/// Options controlling parquet format for individual columns.
///
/// See [`ParquetOptions`] for more details
pub struct ColumnOptions {
pub struct ParquetColumnOptions {
/// Sets if bloom filter is enabled for the column path.
pub bloom_filter_enabled: Option<bool>, default = None

Expand Down
10 changes: 5 additions & 5 deletions datafusion/common/src/file_options/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ mod tests {
};
use std::collections::HashMap;

use crate::config::{ColumnOptions, ParquetOptions};
use crate::config::{ParquetColumnOptions, ParquetOptions};

use super::*;

Expand All @@ -388,8 +388,8 @@ mod tests {
/// Take the column defaults provided in [`ParquetOptions`], and generate a non-default col config.
fn column_options_with_non_defaults(
src_col_defaults: &ParquetOptions,
) -> ColumnOptions {
ColumnOptions {
) -> ParquetColumnOptions {
ParquetColumnOptions {
compression: Some("zstd(22)".into()),
dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
statistics_enabled: Some("none".into()),
Expand Down Expand Up @@ -446,10 +446,10 @@ mod tests {
fn extract_column_options(
props: &WriterProperties,
col: ColumnPath,
) -> ColumnOptions {
) -> ParquetColumnOptions {
let bloom_filter_default_props = props.bloom_filter_properties(&col);

ColumnOptions {
ParquetColumnOptions {
bloom_filter_enabled: Some(bloom_filter_default_props.is_some()),
encoding: props.encoding(&col).map(|s| s.to_string()),
dictionary_enabled: Some(props.dictionary_enabled(&col)),
Expand Down
8 changes: 4 additions & 4 deletions datafusion/proto-common/proto/datafusion_common.proto
Original file line number Diff line number Diff line change
Expand Up @@ -433,15 +433,15 @@ message JsonOptions {

message TableParquetOptions {
ParquetOptions global = 1;
repeated ColumnSpecificOptions column_specific_options = 2;
repeated ParquetColumnSpecificOptions column_specific_options = 2;
}

message ColumnSpecificOptions {
message ParquetColumnSpecificOptions {
string column_name = 1;
ColumnOptions options = 2;
ParquetColumnOptions options = 2;
}

message ColumnOptions {
message ParquetColumnOptions {
oneof bloom_filter_enabled_opt {
bool bloom_filter_enabled = 1;
}
Expand Down
10 changes: 6 additions & 4 deletions datafusion/proto-common/src/from_proto/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ use arrow::ipc::{reader::read_record_batch, root_as_message};
use datafusion_common::{
arrow_datafusion_err,
config::{
ColumnOptions, CsvOptions, JsonOptions, ParquetOptions, TableParquetOptions,
CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
TableParquetOptions,
},
file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
parsers::CompressionTypeVariant,
Expand Down Expand Up @@ -960,12 +961,12 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
}
}

impl TryFrom<&protobuf::ColumnOptions> for ColumnOptions {
impl TryFrom<&protobuf::ColumnOptions> for ParquetColumnOptions {
type Error = DataFusionError;
fn try_from(
value: &protobuf::ColumnOptions,
) -> datafusion_common::Result<Self, Self::Error> {
Ok(ColumnOptions {
Ok(ParquetColumnOptions {
compression: value.compression_opt.clone().map(|opt| match opt {
protobuf::column_options::CompressionOpt::Compression(v) => Some(v),
}).unwrap_or(None),
Expand Down Expand Up @@ -1013,7 +1014,8 @@ impl TryFrom<&protobuf::TableParquetOptions> for TableParquetOptions {
fn try_from(
value: &protobuf::TableParquetOptions,
) -> datafusion_common::Result<Self, Self::Error> {
let mut column_specific_options: HashMap<String, ColumnOptions> = HashMap::new();
let mut column_specific_options: HashMap<String, ParquetColumnOptions> =
HashMap::new();
for protobuf::ColumnSpecificOptions {
column_name,
options: maybe_options,
Expand Down
9 changes: 6 additions & 3 deletions datafusion/proto-common/src/to_proto/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ use arrow::datatypes::{
use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
use datafusion_common::{
config::{
ColumnOptions, CsvOptions, JsonOptions, ParquetOptions, TableParquetOptions,
CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
TableParquetOptions,
},
file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
parsers::CompressionTypeVariant,
Expand Down Expand Up @@ -830,10 +831,12 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
}
}

impl TryFrom<&ColumnOptions> for protobuf::ColumnOptions {
impl TryFrom<&ParquetColumnOptions> for protobuf::ColumnOptions {
type Error = DataFusionError;

fn try_from(value: &ColumnOptions) -> datafusion_common::Result<Self, Self::Error> {
fn try_from(
value: &ParquetColumnOptions,
) -> datafusion_common::Result<Self, Self::Error> {
Ok(protobuf::ColumnOptions {
compression_opt: value
.compression
Expand Down
48 changes: 26 additions & 22 deletions datafusion/proto/src/generated/datafusion_proto_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,46 +670,50 @@ pub struct TableParquetOptions {
#[prost(message, optional, tag = "1")]
pub global: ::core::option::Option<ParquetOptions>,
#[prost(message, repeated, tag = "2")]
pub column_specific_options: ::prost::alloc::vec::Vec<ColumnSpecificOptions>,
pub column_specific_options: ::prost::alloc::vec::Vec<ParquetColumnSpecificOptions>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ColumnSpecificOptions {
pub struct ParquetColumnSpecificOptions {
#[prost(string, tag = "1")]
pub column_name: ::prost::alloc::string::String,
#[prost(message, optional, tag = "2")]
pub options: ::core::option::Option<ColumnOptions>,
pub options: ::core::option::Option<ParquetColumnOptions>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ColumnOptions {
#[prost(oneof = "column_options::BloomFilterEnabledOpt", tags = "1")]
pub struct ParquetColumnOptions {
#[prost(oneof = "parquet_column_options::BloomFilterEnabledOpt", tags = "1")]
pub bloom_filter_enabled_opt: ::core::option::Option<
column_options::BloomFilterEnabledOpt,
parquet_column_options::BloomFilterEnabledOpt,
>,
#[prost(oneof = "column_options::EncodingOpt", tags = "2")]
pub encoding_opt: ::core::option::Option<column_options::EncodingOpt>,
#[prost(oneof = "column_options::DictionaryEnabledOpt", tags = "3")]
#[prost(oneof = "parquet_column_options::EncodingOpt", tags = "2")]
pub encoding_opt: ::core::option::Option<parquet_column_options::EncodingOpt>,
#[prost(oneof = "parquet_column_options::DictionaryEnabledOpt", tags = "3")]
pub dictionary_enabled_opt: ::core::option::Option<
column_options::DictionaryEnabledOpt,
parquet_column_options::DictionaryEnabledOpt,
>,
#[prost(oneof = "column_options::CompressionOpt", tags = "4")]
pub compression_opt: ::core::option::Option<column_options::CompressionOpt>,
#[prost(oneof = "column_options::StatisticsEnabledOpt", tags = "5")]
#[prost(oneof = "parquet_column_options::CompressionOpt", tags = "4")]
pub compression_opt: ::core::option::Option<parquet_column_options::CompressionOpt>,
#[prost(oneof = "parquet_column_options::StatisticsEnabledOpt", tags = "5")]
pub statistics_enabled_opt: ::core::option::Option<
column_options::StatisticsEnabledOpt,
parquet_column_options::StatisticsEnabledOpt,
>,
#[prost(oneof = "column_options::BloomFilterFppOpt", tags = "6")]
pub bloom_filter_fpp_opt: ::core::option::Option<column_options::BloomFilterFppOpt>,
#[prost(oneof = "column_options::BloomFilterNdvOpt", tags = "7")]
pub bloom_filter_ndv_opt: ::core::option::Option<column_options::BloomFilterNdvOpt>,
#[prost(oneof = "column_options::MaxStatisticsSizeOpt", tags = "8")]
#[prost(oneof = "parquet_column_options::BloomFilterFppOpt", tags = "6")]
pub bloom_filter_fpp_opt: ::core::option::Option<
parquet_column_options::BloomFilterFppOpt,
>,
#[prost(oneof = "parquet_column_options::BloomFilterNdvOpt", tags = "7")]
pub bloom_filter_ndv_opt: ::core::option::Option<
parquet_column_options::BloomFilterNdvOpt,
>,
#[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")]
pub max_statistics_size_opt: ::core::option::Option<
column_options::MaxStatisticsSizeOpt,
parquet_column_options::MaxStatisticsSizeOpt,
>,
}
/// Nested message and enum types in `ColumnOptions`.
pub mod column_options {
/// Nested message and enum types in `ParquetColumnOptions`.
pub mod parquet_column_options {
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Oneof)]
pub enum BloomFilterEnabledOpt {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/copy.slt
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ OPTIONS (
)

# errors for invalid property (not stating `format.metadata`)
statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata" not found on ColumnOptions
statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata" not found on ParquetColumnOptions
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
STORED AS PARQUET
Expand Down

0 comments on commit c50fd88

Please sign in to comment.