diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 8af71d5abbb3..2b932b26cad6 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1414,7 +1414,7 @@ pub struct TableParquetOptions { /// Global Parquet options that propagates to all columns. pub global: ParquetOptions, /// Column specific options. Default usage is parquet.XX::column. - pub column_specific_options: HashMap, + pub column_specific_options: HashMap, /// Additional file-level metadata to include. Inserted into the key_value_metadata /// for the written [`FileMetaData`](https://docs.rs/parquet/latest/parquet/file/metadata/struct.FileMetaData.html). /// @@ -1555,7 +1555,7 @@ config_namespace_with_hashmap! { /// Options controlling parquet format for individual columns. /// /// See [`ParquetOptions`] for more details - pub struct ColumnOptions { + pub struct ParquetColumnOptions { /// Sets if bloom filter is enabled for the column path. pub bloom_filter_enabled: Option, default = None diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index 34b7379823f8..80b751858398 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -379,7 +379,7 @@ mod tests { }; use std::collections::HashMap; - use crate::config::{ColumnOptions, ParquetOptions}; + use crate::config::{ParquetColumnOptions, ParquetOptions}; use super::*; @@ -388,8 +388,8 @@ mod tests { /// Take the column defaults provided in [`ParquetOptions`], and generate a non-default col config. fn column_options_with_non_defaults( src_col_defaults: &ParquetOptions, - ) -> ColumnOptions { - ColumnOptions { + ) -> ParquetColumnOptions { + ParquetColumnOptions { compression: Some("zstd(22)".into()), dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v), statistics_enabled: Some("none".into()), @@ -446,10 +446,10 @@ mod tests { fn extract_column_options( props: &WriterProperties, col: ColumnPath, - ) -> ColumnOptions { + ) -> ParquetColumnOptions { let bloom_filter_default_props = props.bloom_filter_properties(&col); - ColumnOptions { + ParquetColumnOptions { bloom_filter_enabled: Some(bloom_filter_default_props.is_some()), encoding: props.encoding(&col).map(|s| s.to_string()), dictionary_enabled: Some(props.dictionary_enabled(&col)), diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 85983dddf6ae..752f2cf76873 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -433,15 +433,15 @@ message JsonOptions { message TableParquetOptions { ParquetOptions global = 1; - repeated ColumnSpecificOptions column_specific_options = 2; + repeated ParquetColumnSpecificOptions column_specific_options = 2; } -message ColumnSpecificOptions { +message ParquetColumnSpecificOptions { string column_name = 1; - ColumnOptions options = 2; + ParquetColumnOptions options = 2; } -message ColumnOptions { +message ParquetColumnOptions { oneof bloom_filter_enabled_opt { bool bloom_filter_enabled = 1; } diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index 5fe9d937f7c4..21db66a12701 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -33,7 +33,8 @@ use arrow::ipc::{reader::read_record_batch, root_as_message}; use datafusion_common::{ arrow_datafusion_err, config::{ - ColumnOptions, CsvOptions, JsonOptions, ParquetOptions, TableParquetOptions, + CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions, + TableParquetOptions, }, file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions}, parsers::CompressionTypeVariant, @@ -960,12 +961,12 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions { } } -impl TryFrom<&protobuf::ColumnOptions> for ColumnOptions { +impl TryFrom<&protobuf::ColumnOptions> for ParquetColumnOptions { type Error = DataFusionError; fn try_from( value: &protobuf::ColumnOptions, ) -> datafusion_common::Result { - Ok(ColumnOptions { + Ok(ParquetColumnOptions { compression: value.compression_opt.clone().map(|opt| match opt { protobuf::column_options::CompressionOpt::Compression(v) => Some(v), }).unwrap_or(None), @@ -1013,7 +1014,8 @@ impl TryFrom<&protobuf::TableParquetOptions> for TableParquetOptions { fn try_from( value: &protobuf::TableParquetOptions, ) -> datafusion_common::Result { - let mut column_specific_options: HashMap = HashMap::new(); + let mut column_specific_options: HashMap = + HashMap::new(); for protobuf::ColumnSpecificOptions { column_name, options: maybe_options, diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index c15da2895b7c..24083e8b7276 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -30,7 +30,8 @@ use arrow::datatypes::{ use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator}; use datafusion_common::{ config::{ - ColumnOptions, CsvOptions, JsonOptions, ParquetOptions, TableParquetOptions, + CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions, + TableParquetOptions, }, file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions}, parsers::CompressionTypeVariant, @@ -830,10 +831,12 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions { } } -impl TryFrom<&ColumnOptions> for protobuf::ColumnOptions { +impl TryFrom<&ParquetColumnOptions> for protobuf::ColumnOptions { type Error = DataFusionError; - fn try_from(value: &ColumnOptions) -> datafusion_common::Result { + fn try_from( + value: &ParquetColumnOptions, + ) -> datafusion_common::Result { Ok(protobuf::ColumnOptions { compression_opt: value .compression diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index bf198a24c811..b36624e391c2 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -670,46 +670,50 @@ pub struct TableParquetOptions { #[prost(message, optional, tag = "1")] pub global: ::core::option::Option, #[prost(message, repeated, tag = "2")] - pub column_specific_options: ::prost::alloc::vec::Vec, + pub column_specific_options: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] -pub struct ColumnSpecificOptions { +pub struct ParquetColumnSpecificOptions { #[prost(string, tag = "1")] pub column_name: ::prost::alloc::string::String, #[prost(message, optional, tag = "2")] - pub options: ::core::option::Option, + pub options: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] -pub struct ColumnOptions { - #[prost(oneof = "column_options::BloomFilterEnabledOpt", tags = "1")] +pub struct ParquetColumnOptions { + #[prost(oneof = "parquet_column_options::BloomFilterEnabledOpt", tags = "1")] pub bloom_filter_enabled_opt: ::core::option::Option< - column_options::BloomFilterEnabledOpt, + parquet_column_options::BloomFilterEnabledOpt, >, - #[prost(oneof = "column_options::EncodingOpt", tags = "2")] - pub encoding_opt: ::core::option::Option, - #[prost(oneof = "column_options::DictionaryEnabledOpt", tags = "3")] + #[prost(oneof = "parquet_column_options::EncodingOpt", tags = "2")] + pub encoding_opt: ::core::option::Option, + #[prost(oneof = "parquet_column_options::DictionaryEnabledOpt", tags = "3")] pub dictionary_enabled_opt: ::core::option::Option< - column_options::DictionaryEnabledOpt, + parquet_column_options::DictionaryEnabledOpt, >, - #[prost(oneof = "column_options::CompressionOpt", tags = "4")] - pub compression_opt: ::core::option::Option, - #[prost(oneof = "column_options::StatisticsEnabledOpt", tags = "5")] + #[prost(oneof = "parquet_column_options::CompressionOpt", tags = "4")] + pub compression_opt: ::core::option::Option, + #[prost(oneof = "parquet_column_options::StatisticsEnabledOpt", tags = "5")] pub statistics_enabled_opt: ::core::option::Option< - column_options::StatisticsEnabledOpt, + parquet_column_options::StatisticsEnabledOpt, >, - #[prost(oneof = "column_options::BloomFilterFppOpt", tags = "6")] - pub bloom_filter_fpp_opt: ::core::option::Option, - #[prost(oneof = "column_options::BloomFilterNdvOpt", tags = "7")] - pub bloom_filter_ndv_opt: ::core::option::Option, - #[prost(oneof = "column_options::MaxStatisticsSizeOpt", tags = "8")] + #[prost(oneof = "parquet_column_options::BloomFilterFppOpt", tags = "6")] + pub bloom_filter_fpp_opt: ::core::option::Option< + parquet_column_options::BloomFilterFppOpt, + >, + #[prost(oneof = "parquet_column_options::BloomFilterNdvOpt", tags = "7")] + pub bloom_filter_ndv_opt: ::core::option::Option< + parquet_column_options::BloomFilterNdvOpt, + >, + #[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")] pub max_statistics_size_opt: ::core::option::Option< - column_options::MaxStatisticsSizeOpt, + parquet_column_options::MaxStatisticsSizeOpt, >, } -/// Nested message and enum types in `ColumnOptions`. -pub mod column_options { +/// Nested message and enum types in `ParquetColumnOptions`. +pub mod parquet_column_options { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Oneof)] pub enum BloomFilterEnabledOpt { diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index 7af4c52c654b..ff7040926caa 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -358,7 +358,7 @@ OPTIONS ( ) # errors for invalid property (not stating `format.metadata`) -statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata" not found on ColumnOptions +statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata" not found on ParquetColumnOptions COPY source_table TO 'test_files/scratch/copy/table_with_metadata/' STORED AS PARQUET