diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 2ee69dcf1068..4bff9097d1a5 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1005,7 +1005,6 @@ mod tests { }; use arrow_select::concat::concat_batches; - use crate::arrow::arrow_reader::ArrowReaderMetadata; use crate::arrow::arrow_reader::{ ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector, @@ -1018,15 +1017,11 @@ mod tests { BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType, FloatType, Int32Type, Int64Type, Int96Type, }; - #[cfg(feature = "encryption")] - use crate::encryption::decrypt::FileDecryptionProperties; use crate::errors::Result; use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion}; use crate::file::writer::SerializedFileWriter; use crate::schema::parser::parse_message_type; use crate::schema::types::{Type, TypePtr}; - #[cfg(feature = "encryption")] - use crate::util::test_common::encryption_util::verify_encryption_test_file_read; use crate::util::test_common::rand_gen::RandGen; #[test] @@ -1855,241 +1850,6 @@ mod tests { assert!(col.value(2).is_nan()); } - #[test] - #[cfg(feature = "encryption")] - fn test_non_uniform_encryption_plaintext_footer() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); - let file = File::open(path).unwrap(); - - // There is always a footer key even with a plaintext footer, - // but this is used for signing the footer. - let footer_key = "0123456789012345".as_bytes(); // 128bit/16 - let column_1_key = "1234567890123450".as_bytes(); - let column_2_key = "1234567890123451".as_bytes(); - - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key.to_vec()) - .with_column_key("float_field", column_2_key.to_vec()) - .build() - .unwrap(); - - verify_encryption_test_file_read(file, decryption_properties); - } - - #[test] - #[cfg(feature = "encryption")] - fn test_non_uniform_encryption_disabled_aad_storage() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = - format!("{testdata}/encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted"); - let file = File::open(path.clone()).unwrap(); - - let footer_key = "0123456789012345".as_bytes(); // 128bit/16 - let column_1_key = "1234567890123450".as_bytes(); - let column_2_key = "1234567890123451".as_bytes(); - - // Can read successfully when providing the correct AAD prefix - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key.to_vec()) - .with_column_key("float_field", column_2_key.to_vec()) - .with_aad_prefix("tester".as_bytes().to_vec()) - .build() - .unwrap(); - - verify_encryption_test_file_read(file, decryption_properties); - - // Using wrong AAD prefix should fail - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key.to_vec()) - .with_column_key("float_field", column_2_key.to_vec()) - .with_aad_prefix("wrong_aad_prefix".as_bytes().to_vec()) - .build() - .unwrap(); - - let file = File::open(path.clone()).unwrap(); - let options = ArrowReaderOptions::default() - .with_file_decryption_properties(decryption_properties.clone()); - let result = ArrowReaderMetadata::load(&file, options.clone()); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - "Parquet error: Provided footer key and AAD were unable to decrypt parquet footer" - ); - - // Not providing any AAD prefix should fail as it isn't stored in the file - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key.to_vec()) - .with_column_key("float_field", column_2_key.to_vec()) - .build() - .unwrap(); - - let file = File::open(path).unwrap(); - let options = ArrowReaderOptions::default() - .with_file_decryption_properties(decryption_properties.clone()); - let result = ArrowReaderMetadata::load(&file, options.clone()); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - "Parquet error: Provided footer key and AAD were unable to decrypt parquet footer" - ); - } - - #[test] - fn test_non_uniform_encryption_plaintext_footer_without_decryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); - let file = File::open(&path).unwrap(); - - let metadata = ArrowReaderMetadata::load(&file, Default::default()).unwrap(); - let file_metadata = metadata.metadata.file_metadata(); - - assert_eq!(file_metadata.num_rows(), 50); - assert_eq!(file_metadata.schema_descr().num_columns(), 8); - assert_eq!( - file_metadata.created_by().unwrap(), - "parquet-cpp-arrow version 19.0.0-SNAPSHOT" - ); - - metadata.metadata.row_groups().iter().for_each(|rg| { - assert_eq!(rg.num_columns(), 8); - assert_eq!(rg.num_rows(), 50); - }); - - // Should be able to read unencrypted columns. Test reading one column. - let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap(); - let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]); - let record_reader = builder.with_projection(mask).build().unwrap(); - - let mut row_count = 0; - for batch in record_reader { - let batch = batch.unwrap(); - row_count += batch.num_rows(); - - let time_col = batch - .column(0) - .as_primitive::(); - for (i, x) in time_col.iter().enumerate() { - assert_eq!(x.unwrap(), i as i32); - } - } - - assert_eq!(row_count, file_metadata.num_rows() as usize); - - // Reading an encrypted column should fail - let file = File::open(&path).unwrap(); - let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap(); - let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]); - let mut record_reader = builder.with_projection(mask).build().unwrap(); - - match record_reader.next() { - Some(Err(ArrowError::ParquetError(s))) => { - assert!(s.contains("protocol error")); - } - _ => { - panic!("Expected ArrowError::ParquetError"); - } - }; - } - - #[test] - #[cfg(feature = "encryption")] - fn test_non_uniform_encryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted"); - let file = File::open(path).unwrap(); - - let footer_key = "0123456789012345".as_bytes(); // 128bit/16 - let column_1_key = "1234567890123450".as_bytes(); - let column_2_key = "1234567890123451".as_bytes(); - - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key.to_vec()) - .with_column_key("float_field", column_2_key.to_vec()) - .build() - .unwrap(); - - verify_encryption_test_file_read(file, decryption_properties); - } - - #[test] - #[cfg(feature = "encryption")] - fn test_uniform_encryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/uniform_encryption.parquet.encrypted"); - let file = File::open(path).unwrap(); - - let key_code: &[u8] = "0123456789012345".as_bytes(); - let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) - .build() - .unwrap(); - - verify_encryption_test_file_read(file, decryption_properties); - } - - #[test] - #[cfg(not(feature = "encryption"))] - fn test_decrypting_without_encryption_flag_fails() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/uniform_encryption.parquet.encrypted"); - let file = File::open(path).unwrap(); - - let options = ArrowReaderOptions::default(); - let result = ArrowReaderMetadata::load(&file, options.clone()); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - "Parquet error: Parquet file has an encrypted footer but the encryption feature is disabled" - ); - } - - #[test] - #[cfg(feature = "encryption")] - fn test_decrypting_without_decryption_properties_fails() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/uniform_encryption.parquet.encrypted"); - let file = File::open(path).unwrap(); - - let options = ArrowReaderOptions::default(); - let result = ArrowReaderMetadata::load(&file, options.clone()); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - "Parquet error: Parquet file has an encrypted footer but no decryption properties were provided" - ); - } - - #[test] - #[cfg(feature = "encryption")] - fn test_aes_ctr_encryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer_ctr.parquet.encrypted"); - let file = File::open(path).unwrap(); - - let footer_key = "0123456789012345".as_bytes(); - let column_1_key = "1234567890123450".as_bytes(); - let column_2_key = "1234567890123451".as_bytes(); - - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key.to_vec()) - .with_column_key("float_field", column_2_key.to_vec()) - .build() - .unwrap(); - - let options = - ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); - let metadata = ArrowReaderMetadata::load(&file, options); - - match metadata { - Err(crate::errors::ParquetError::NYI(s)) => { - assert!(s.contains("AES_GCM_CTR_V1")); - } - _ => { - panic!("Expected ParquetError::NYI"); - } - }; - } - #[test] fn test_read_float32_float64_byte_stream_split() { let path = format!( diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 9afd7d835528..fd49ad22934d 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -1167,17 +1167,12 @@ mod tests { use crate::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use crate::arrow::schema::parquet_to_arrow_schema_and_fields; use crate::arrow::ArrowWriter; - #[cfg(feature = "encryption")] - use crate::encryption::decrypt::FileDecryptionProperties; use crate::file::metadata::ParquetMetaDataReader; use crate::file::properties::WriterProperties; - #[cfg(feature = "encryption")] - use crate::util::test_common::encryption_util::verify_encryption_test_file_read_async; use arrow::compute::kernels::cmp::eq; use arrow::error::Result as ArrowResult; use arrow_array::builder::{ListBuilder, StringBuilder}; use arrow_array::cast::AsArray; - use arrow_array::types; use arrow_array::types::Int32Type; use arrow_array::{ Array, ArrayRef, Int32Array, Int8Array, RecordBatchReader, Scalar, StringArray, @@ -1189,7 +1184,6 @@ mod tests { use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tempfile::tempfile; - use tokio::fs::File; #[derive(Clone)] struct TestReader { @@ -2459,279 +2453,4 @@ mod tests { let result = reader.try_collect::>().await.unwrap(); assert_eq!(result.len(), 1); } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_non_uniform_encryption_plaintext_footer() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - // There is always a footer key even with a plaintext footer, - // but this is used for signing the footer. - let footer_key = "0123456789012345".as_bytes().to_vec(); // 128bit/16 - let column_1_key = "1234567890123450".as_bytes().to_vec(); - let column_2_key = "1234567890123451".as_bytes().to_vec(); - - let decryption_properties = FileDecryptionProperties::builder(footer_key) - .with_column_key("double_field", column_1_key) - .with_column_key("float_field", column_2_key) - .build() - .unwrap(); - - verify_encryption_test_file_read_async(&mut file, decryption_properties) - .await - .unwrap(); - } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_misspecified_encryption_keys() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted"); - - // There is always a footer key even with a plaintext footer, - // but this is used for signing the footer. - let footer_key = "0123456789012345".as_bytes(); // 128bit/16 - let column_1_key = "1234567890123450".as_bytes(); - let column_2_key = "1234567890123451".as_bytes(); - - // read file with keys and check for expected error message - async fn check_for_error( - expected_message: &str, - path: &String, - footer_key: &[u8], - column_1_key: &[u8], - column_2_key: &[u8], - ) { - let mut file = File::open(&path).await.unwrap(); - - let mut decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()); - - if !column_1_key.is_empty() { - decryption_properties = - decryption_properties.with_column_key("double_field", column_1_key.to_vec()); - } - - if !column_2_key.is_empty() { - decryption_properties = - decryption_properties.with_column_key("float_field", column_2_key.to_vec()); - } - - let decryption_properties = decryption_properties.build().unwrap(); - - match verify_encryption_test_file_read_async(&mut file, decryption_properties).await { - Ok(_) => { - panic!("did not get expected error") - } - Err(e) => { - assert_eq!(e.to_string(), expected_message); - } - } - } - - // Too short footer key - check_for_error( - "Parquet error: Invalid footer key. Failed to create AES key", - &path, - "bad_pwd".as_bytes(), - column_1_key, - column_2_key, - ) - .await; - - // Wrong footer key - check_for_error( - "Parquet error: Provided footer key and AAD were unable to decrypt parquet footer", - &path, - "1123456789012345".as_bytes(), - column_1_key, - column_2_key, - ) - .await; - - // Missing column key - check_for_error("Parquet error: Unable to decrypt column 'double_field', perhaps the column key is wrong or missing?", - &path, footer_key, "".as_bytes(), column_2_key).await; - - // Too short column key - check_for_error( - "Parquet error: Failed to create AES key", - &path, - footer_key, - "abc".as_bytes(), - column_2_key, - ) - .await; - - // Wrong column key - check_for_error("Parquet error: Unable to decrypt column 'double_field', perhaps the column key is wrong or missing?", - &path, footer_key, "1123456789012345".as_bytes(), column_2_key).await; - - // Mixed up keys - check_for_error("Parquet error: Unable to decrypt column 'float_field', perhaps the column key is wrong or missing?", - &path, footer_key, column_2_key, column_1_key).await; - } - - #[tokio::test] - async fn test_non_uniform_encryption_plaintext_footer_without_decryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - let metadata = ArrowReaderMetadata::load_async(&mut file, Default::default()) - .await - .unwrap(); - let file_metadata = metadata.metadata.file_metadata(); - - assert_eq!(file_metadata.num_rows(), 50); - assert_eq!(file_metadata.schema_descr().num_columns(), 8); - assert_eq!( - file_metadata.created_by().unwrap(), - "parquet-cpp-arrow version 19.0.0-SNAPSHOT" - ); - - metadata.metadata.row_groups().iter().for_each(|rg| { - assert_eq!(rg.num_columns(), 8); - assert_eq!(rg.num_rows(), 50); - }); - - // Should be able to read unencrypted columns. Test reading one column. - let builder = ParquetRecordBatchStreamBuilder::new(file).await.unwrap(); - let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]); - let record_reader = builder.with_projection(mask).build().unwrap(); - let record_batches = record_reader.try_collect::>().await.unwrap(); - - let mut row_count = 0; - for batch in record_batches { - let batch = batch; - row_count += batch.num_rows(); - - let time_col = batch - .column(0) - .as_primitive::(); - for (i, x) in time_col.iter().enumerate() { - assert_eq!(x.unwrap(), i as i32); - } - } - - assert_eq!(row_count, file_metadata.num_rows() as usize); - - // Reading an encrypted column should fail - let file = File::open(&path).await.unwrap(); - let builder = ParquetRecordBatchStreamBuilder::new(file).await.unwrap(); - let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]); - let mut record_reader = builder.with_projection(mask).build().unwrap(); - - match record_reader.next().await { - Some(Err(ParquetError::ArrowError(s))) => { - assert!(s.contains("protocol error")); - } - _ => { - panic!("Expected ArrowError::ParquetError"); - } - }; - } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_non_uniform_encryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - let footer_key = "0123456789012345".as_bytes().to_vec(); // 128bit/16 - let column_1_key = "1234567890123450".as_bytes().to_vec(); - let column_2_key = "1234567890123451".as_bytes().to_vec(); - - let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .with_column_key("double_field", column_1_key) - .with_column_key("float_field", column_2_key) - .build() - .unwrap(); - - verify_encryption_test_file_read_async(&mut file, decryption_properties) - .await - .unwrap(); - } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_uniform_encryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/uniform_encryption.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - let key_code: &[u8] = "0123456789012345".as_bytes(); - let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) - .build() - .unwrap(); - - verify_encryption_test_file_read_async(&mut file, decryption_properties) - .await - .unwrap(); - } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_aes_ctr_encryption() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer_ctr.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - let footer_key = "0123456789012345".as_bytes().to_vec(); - let column_1_key = "1234567890123450".as_bytes().to_vec(); - let column_2_key = "1234567890123451".as_bytes().to_vec(); - - let decryption_properties = FileDecryptionProperties::builder(footer_key) - .with_column_key("double_field", column_1_key) - .with_column_key("float_field", column_2_key) - .build() - .unwrap(); - - let options = - ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties); - let metadata = ArrowReaderMetadata::load_async(&mut file, options).await; - - match metadata { - Err(ParquetError::NYI(s)) => { - assert!(s.contains("AES_GCM_CTR_V1")); - } - _ => { - panic!("Expected ParquetError::NYI"); - } - }; - } - - #[tokio::test] - #[cfg(not(feature = "encryption"))] - async fn test_decrypting_without_encryption_flag_fails() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/uniform_encryption.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - let options = ArrowReaderOptions::new(); - let result = ArrowReaderMetadata::load_async(&mut file, options).await; - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - "Parquet error: Parquet file has an encrypted footer but the encryption feature is disabled" - ); - } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_decrypting_without_decryption_properties_fails() { - let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/uniform_encryption.parquet.encrypted"); - let mut file = File::open(&path).await.unwrap(); - - let options = ArrowReaderOptions::new(); - let result = ArrowReaderMetadata::load_async(&mut file, options).await; - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - "Parquet error: Parquet file has an encrypted footer but no decryption properties were provided" - ); - } } diff --git a/parquet/src/arrow/async_reader/store.rs b/parquet/src/arrow/async_reader/store.rs index 6922f3d1f7a3..a1e94efd1451 100644 --- a/parquet/src/arrow/async_reader/store.rs +++ b/parquet/src/arrow/async_reader/store.rs @@ -207,10 +207,8 @@ mod tests { use futures::TryStreamExt; - use crate::arrow::arrow_reader::ArrowReaderOptions; use crate::arrow::async_reader::{AsyncFileReader, ParquetObjectReader}; use crate::arrow::ParquetRecordBatchStreamBuilder; - use crate::encryption::decrypt::FileDecryptionProperties; use crate::errors::ParquetError; use arrow::util::test_util::parquet_test_data; use futures::FutureExt; @@ -230,45 +228,6 @@ mod tests { (meta, Arc::new(store) as Arc) } - #[cfg(feature = "encryption")] - async fn get_encrypted_meta_store() -> (ObjectMeta, Arc) { - let res = parquet_test_data(); - let store = LocalFileSystem::new_with_prefix(res).unwrap(); - - let meta = store - .head(&Path::from("uniform_encryption.parquet.encrypted")) - .await - .unwrap(); - - (meta, Arc::new(store) as Arc) - } - - #[tokio::test] - #[cfg(feature = "encryption")] - async fn test_encrypted() { - let (meta, store) = get_encrypted_meta_store().await; - - let key_code: &[u8] = "0123456789012345".as_bytes(); - let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) - .build() - .unwrap(); - let options = - ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties); - let mut reader = ParquetObjectReader::new(store.clone(), meta.clone()); - let metadata = reader.get_metadata_with_options(&options).await.unwrap(); - - assert_eq!(metadata.num_row_groups(), 1); - - let reader = ParquetObjectReader::new(store, meta); - let builder = ParquetRecordBatchStreamBuilder::new_with_options(reader, options) - .await - .unwrap(); - let batches: Vec<_> = builder.build().unwrap().try_collect().await.unwrap(); - - assert_eq!(batches.len(), 1); - assert_eq!(batches[0].num_rows(), 50); - } - #[tokio::test] async fn test_simple() { let (meta, store) = get_meta_store().await; diff --git a/parquet/src/util/test_common/mod.rs b/parquet/src/util/test_common/mod.rs index ac36118c3702..8cfc1e6dd423 100644 --- a/parquet/src/util/test_common/mod.rs +++ b/parquet/src/util/test_common/mod.rs @@ -22,6 +22,3 @@ pub mod file_util; #[cfg(test)] pub mod rand_gen; - -#[cfg(all(test, feature = "encryption", feature = "arrow"))] -pub mod encryption_util; diff --git a/parquet/tests/arrow_reader/encryption.rs b/parquet/tests/arrow_reader/encryption.rs new file mode 100644 index 000000000000..1d633f799177 --- /dev/null +++ b/parquet/tests/arrow_reader/encryption.rs @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module contains tests for reading encrypted Parquet files with the Arrow API + +use crate::encryption_util::verify_encryption_test_data; +use arrow_array::RecordBatch; +use parquet::arrow::arrow_reader::{ + ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +}; +use parquet::encryption::decrypt::FileDecryptionProperties; +use std::fs::File; + +#[test] +fn test_non_uniform_encryption_plaintext_footer() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); + let file = File::open(path).unwrap(); + + // There is always a footer key even with a plaintext footer, + // but this is used for signing the footer. + let footer_key = "0123456789012345".as_bytes(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read(file, decryption_properties); +} + +#[test] +fn test_non_uniform_encryption_disabled_aad_storage() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = + format!("{test_data}/encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted"); + let file = File::open(path.clone()).unwrap(); + + let footer_key = "0123456789012345".as_bytes(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + + // Can read successfully when providing the correct AAD prefix + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .with_aad_prefix("tester".as_bytes().to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read(file, decryption_properties); + + // Using wrong AAD prefix should fail + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .with_aad_prefix("wrong_aad_prefix".as_bytes().to_vec()) + .build() + .unwrap(); + + let file = File::open(path.clone()).unwrap(); + let options = ArrowReaderOptions::default() + .with_file_decryption_properties(decryption_properties.clone()); + let result = ArrowReaderMetadata::load(&file, options.clone()); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + "Parquet error: Provided footer key and AAD were unable to decrypt parquet footer" + ); + + // Not providing any AAD prefix should fail as it isn't stored in the file + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + let file = File::open(path).unwrap(); + let options = ArrowReaderOptions::default() + .with_file_decryption_properties(decryption_properties.clone()); + let result = ArrowReaderMetadata::load(&file, options.clone()); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + "Parquet error: Provided footer key and AAD were unable to decrypt parquet footer" + ); +} + +#[test] +#[cfg(feature = "snap")] +fn test_plaintext_footer_read_without_decryption() { + crate::encryption_agnostic::read_plaintext_footer_file_without_decryption_properties(); +} + +#[test] +fn test_non_uniform_encryption() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted"); + let file = File::open(path).unwrap(); + + let footer_key = "0123456789012345".as_bytes(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read(file, decryption_properties); +} + +#[test] +fn test_uniform_encryption() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); + let file = File::open(path).unwrap(); + + let key_code: &[u8] = "0123456789012345".as_bytes(); + let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read(file, decryption_properties); +} + +#[test] +fn test_decrypting_without_decryption_properties_fails() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); + let file = File::open(path).unwrap(); + + let options = ArrowReaderOptions::default(); + let result = ArrowReaderMetadata::load(&file, options.clone()); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + "Parquet error: Parquet file has an encrypted footer but no decryption properties were provided" + ); +} + +#[test] +fn test_aes_ctr_encryption() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_and_footer_ctr.parquet.encrypted"); + let file = File::open(path).unwrap(); + + let footer_key = "0123456789012345".as_bytes(); + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + let options = + ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); + let metadata = ArrowReaderMetadata::load(&file, options); + + match metadata { + Err(parquet::errors::ParquetError::NYI(s)) => { + assert!(s.contains("AES_GCM_CTR_V1")); + } + _ => { + panic!("Expected ParquetError::NYI"); + } + }; +} + +fn verify_encryption_test_file_read(file: File, decryption_properties: FileDecryptionProperties) { + let options = + ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); + let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); + let metadata = reader_metadata.metadata(); + + let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let record_reader = builder.build().unwrap(); + let record_batches = record_reader + .map(|x| x.unwrap()) + .collect::>(); + + verify_encryption_test_data(record_batches, metadata); +} diff --git a/parquet/tests/arrow_reader/encryption_agnostic.rs b/parquet/tests/arrow_reader/encryption_agnostic.rs new file mode 100644 index 000000000000..e071471712f4 --- /dev/null +++ b/parquet/tests/arrow_reader/encryption_agnostic.rs @@ -0,0 +1,146 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common tests for encryption related functionality + +use arrow_array::cast::AsArray; +use arrow_array::types; +use arrow_schema::ArrowError; +use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder}; +use parquet::arrow::ProjectionMask; +use std::fs::File; + +pub fn read_plaintext_footer_file_without_decryption_properties() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); + let file = File::open(&path).unwrap(); + + let metadata = ArrowReaderMetadata::load(&file, Default::default()).unwrap(); + let file_metadata = metadata.metadata().file_metadata(); + + assert_eq!(file_metadata.num_rows(), 50); + assert_eq!(file_metadata.schema_descr().num_columns(), 8); + assert_eq!( + file_metadata.created_by().unwrap(), + "parquet-cpp-arrow version 19.0.0-SNAPSHOT" + ); + + metadata.metadata().row_groups().iter().for_each(|rg| { + assert_eq!(rg.num_columns(), 8); + assert_eq!(rg.num_rows(), 50); + }); + + // Should be able to read unencrypted columns. Test reading one column. + let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap(); + let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]); + let record_reader = builder.with_projection(mask).build().unwrap(); + + let mut row_count = 0; + for batch in record_reader { + let batch = batch.unwrap(); + row_count += batch.num_rows(); + + let time_col = batch + .column(0) + .as_primitive::(); + for (i, x) in time_col.iter().enumerate() { + assert_eq!(x.unwrap(), i as i32); + } + } + + assert_eq!(row_count, file_metadata.num_rows() as usize); + + // Reading an encrypted column should fail + let file = File::open(&path).unwrap(); + let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap(); + let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]); + let mut record_reader = builder.with_projection(mask).build().unwrap(); + + match record_reader.next() { + Some(Err(ArrowError::ParquetError(s))) => { + assert!(s.contains("protocol error")); + } + _ => { + panic!("Expected ArrowError::ParquetError"); + } + }; +} + +#[cfg(feature = "async")] +pub async fn read_plaintext_footer_file_without_decryption_properties_async() { + use futures::StreamExt; + use futures::TryStreamExt; + use parquet::arrow::ParquetRecordBatchStreamBuilder; + use parquet::errors::ParquetError; + + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); + let mut file = tokio::fs::File::open(&path).await.unwrap(); + + let metadata = ArrowReaderMetadata::load_async(&mut file, Default::default()) + .await + .unwrap(); + let file_metadata = metadata.metadata().file_metadata(); + + assert_eq!(file_metadata.num_rows(), 50); + assert_eq!(file_metadata.schema_descr().num_columns(), 8); + assert_eq!( + file_metadata.created_by().unwrap(), + "parquet-cpp-arrow version 19.0.0-SNAPSHOT" + ); + + metadata.metadata().row_groups().iter().for_each(|rg| { + assert_eq!(rg.num_columns(), 8); + assert_eq!(rg.num_rows(), 50); + }); + + // Should be able to read unencrypted columns. Test reading one column. + let builder = ParquetRecordBatchStreamBuilder::new(file).await.unwrap(); + let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]); + let record_reader = builder.with_projection(mask).build().unwrap(); + let record_batches = record_reader.try_collect::>().await.unwrap(); + + let mut row_count = 0; + for batch in record_batches { + let batch = batch; + row_count += batch.num_rows(); + + let time_col = batch + .column(0) + .as_primitive::(); + for (i, x) in time_col.iter().enumerate() { + assert_eq!(x.unwrap(), i as i32); + } + } + + assert_eq!(row_count, file_metadata.num_rows() as usize); + + // Reading an encrypted column should fail + let file = tokio::fs::File::open(&path).await.unwrap(); + let builder = ParquetRecordBatchStreamBuilder::new(file).await.unwrap(); + let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]); + let mut record_reader = builder.with_projection(mask).build().unwrap(); + + match record_reader.next().await { + Some(Err(ParquetError::ArrowError(s))) => { + assert!(s.contains("protocol error")); + } + _ => { + panic!("Expected ArrowError::ParquetError"); + } + }; +} diff --git a/parquet/tests/arrow_reader/encryption_async.rs b/parquet/tests/arrow_reader/encryption_async.rs new file mode 100644 index 000000000000..01ed0a9fc9cb --- /dev/null +++ b/parquet/tests/arrow_reader/encryption_async.rs @@ -0,0 +1,289 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module contains tests for reading encrypted Parquet files with the async Arrow API + +use crate::encryption_util::verify_encryption_test_data; +use futures::TryStreamExt; +use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; +use parquet::arrow::ParquetRecordBatchStreamBuilder; +use parquet::encryption::decrypt::FileDecryptionProperties; +use parquet::errors::ParquetError; +use tokio::fs::File; + +#[tokio::test] +async fn test_non_uniform_encryption_plaintext_footer() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); + let mut file = File::open(&path).await.unwrap(); + + // There is always a footer key even with a plaintext footer, + // but this is used for signing the footer. + let footer_key = "0123456789012345".as_bytes().to_vec(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes().to_vec(); + let column_2_key = "1234567890123451".as_bytes().to_vec(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key) + .with_column_key("double_field", column_1_key) + .with_column_key("float_field", column_2_key) + .build() + .unwrap(); + + verify_encryption_test_file_read_async(&mut file, decryption_properties) + .await + .unwrap(); +} + +#[tokio::test] +async fn test_misspecified_encryption_keys() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted"); + + // There is always a footer key even with a plaintext footer, + // but this is used for signing the footer. + let footer_key = "0123456789012345".as_bytes(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + + // read file with keys and check for expected error message + async fn check_for_error( + expected_message: &str, + path: &String, + footer_key: &[u8], + column_1_key: &[u8], + column_2_key: &[u8], + ) { + let mut file = File::open(&path).await.unwrap(); + + let mut decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()); + + if !column_1_key.is_empty() { + decryption_properties = + decryption_properties.with_column_key("double_field", column_1_key.to_vec()); + } + + if !column_2_key.is_empty() { + decryption_properties = + decryption_properties.with_column_key("float_field", column_2_key.to_vec()); + } + + let decryption_properties = decryption_properties.build().unwrap(); + + match verify_encryption_test_file_read_async(&mut file, decryption_properties).await { + Ok(_) => { + panic!("did not get expected error") + } + Err(e) => { + assert_eq!(e.to_string(), expected_message); + } + } + } + + // Too short footer key + check_for_error( + "Parquet error: Invalid footer key. Failed to create AES key", + &path, + "bad_pwd".as_bytes(), + column_1_key, + column_2_key, + ) + .await; + + // Wrong footer key + check_for_error( + "Parquet error: Provided footer key and AAD were unable to decrypt parquet footer", + &path, + "1123456789012345".as_bytes(), + column_1_key, + column_2_key, + ) + .await; + + // Missing column key + check_for_error("Parquet error: Unable to decrypt column 'double_field', perhaps the column key is wrong or missing?", + &path, footer_key, "".as_bytes(), column_2_key).await; + + // Too short column key + check_for_error( + "Parquet error: Failed to create AES key", + &path, + footer_key, + "abc".as_bytes(), + column_2_key, + ) + .await; + + // Wrong column key + check_for_error("Parquet error: Unable to decrypt column 'double_field', perhaps the column key is wrong or missing?", + &path, footer_key, "1123456789012345".as_bytes(), column_2_key).await; + + // Mixed up keys + check_for_error("Parquet error: Unable to decrypt column 'float_field', perhaps the column key is wrong or missing?", + &path, footer_key, column_2_key, column_1_key).await; +} + +#[tokio::test] +#[cfg(feature = "snap")] +async fn test_plaintext_footer_read_without_decryption() { + crate::encryption_agnostic::read_plaintext_footer_file_without_decryption_properties_async() + .await; +} + +#[tokio::test] +async fn test_non_uniform_encryption() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted"); + let mut file = File::open(&path).await.unwrap(); + + let footer_key = "0123456789012345".as_bytes().to_vec(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes().to_vec(); + let column_2_key = "1234567890123451".as_bytes().to_vec(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key) + .with_column_key("float_field", column_2_key) + .build() + .unwrap(); + + verify_encryption_test_file_read_async(&mut file, decryption_properties) + .await + .unwrap(); +} + +#[tokio::test] +async fn test_uniform_encryption() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); + let mut file = File::open(&path).await.unwrap(); + + let key_code: &[u8] = "0123456789012345".as_bytes(); + let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read_async(&mut file, decryption_properties) + .await + .unwrap(); +} + +#[tokio::test] +async fn test_aes_ctr_encryption() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_and_footer_ctr.parquet.encrypted"); + let mut file = File::open(&path).await.unwrap(); + + let footer_key = "0123456789012345".as_bytes().to_vec(); + let column_1_key = "1234567890123450".as_bytes().to_vec(); + //let column_2_key = "1234567890123451".as_bytes().to_vec(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key) + .with_column_key("double_field", column_1_key.clone()) + .with_column_key("float_field", column_1_key) + .build() + .unwrap(); + + let options = ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties); + let metadata = ArrowReaderMetadata::load_async(&mut file, options).await; + + match metadata { + Err(ParquetError::NYI(s)) => { + assert!(s.contains("AES_GCM_CTR_V1")); + } + _ => { + panic!("Expected ParquetError::NYI"); + } + }; +} + +#[tokio::test] +async fn test_decrypting_without_decryption_properties_fails() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); + let mut file = File::open(&path).await.unwrap(); + + let options = ArrowReaderOptions::new(); + let result = ArrowReaderMetadata::load_async(&mut file, options).await; + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + "Parquet error: Parquet file has an encrypted footer but no decryption properties were provided" + ); +} + +#[cfg(feature = "object_store")] +async fn get_encrypted_meta_store() -> ( + object_store::ObjectMeta, + std::sync::Arc, +) { + use object_store::local::LocalFileSystem; + use object_store::path::Path; + use object_store::ObjectStore; + + use std::sync::Arc; + let test_data = arrow::util::test_util::parquet_test_data(); + let store = LocalFileSystem::new_with_prefix(test_data).unwrap(); + + let meta = store + .head(&Path::from("uniform_encryption.parquet.encrypted")) + .await + .unwrap(); + + (meta, Arc::new(store) as Arc) +} + +#[tokio::test] +#[cfg(feature = "object_store")] +async fn test_read_encrypted_file_from_object_store() { + use parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader}; + let (meta, store) = get_encrypted_meta_store().await; + + let key_code: &[u8] = "0123456789012345".as_bytes(); + let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) + .build() + .unwrap(); + let options = ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties); + + let mut reader = ParquetObjectReader::new(store, meta); + let metadata = reader.get_metadata_with_options(&options).await.unwrap(); + let builder = ParquetRecordBatchStreamBuilder::new_with_options(reader, options) + .await + .unwrap(); + let batch_stream = builder.build().unwrap(); + let record_batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + verify_encryption_test_data(record_batches, &metadata); +} + +async fn verify_encryption_test_file_read_async( + file: &mut tokio::fs::File, + decryption_properties: FileDecryptionProperties, +) -> Result<(), ParquetError> { + let options = ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties); + + let arrow_metadata = ArrowReaderMetadata::load_async(file, options).await?; + let metadata = arrow_metadata.metadata(); + + let record_reader = ParquetRecordBatchStreamBuilder::new_with_metadata( + file.try_clone().await?, + arrow_metadata.clone(), + ) + .build()?; + let record_batches = record_reader.try_collect::>().await?; + + verify_encryption_test_data(record_batches, metadata); + Ok(()) +} diff --git a/parquet/tests/arrow_reader/encryption_disabled.rs b/parquet/tests/arrow_reader/encryption_disabled.rs new file mode 100644 index 000000000000..8b38fd5e4ea4 --- /dev/null +++ b/parquet/tests/arrow_reader/encryption_disabled.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; +use std::fs::File; + +#[test] +fn test_read_without_encryption_enabled_fails() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); + let file = File::open(path).unwrap(); + + let options = ArrowReaderOptions::default(); + let result = ArrowReaderMetadata::load(&file, options.clone()); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + "Parquet error: Parquet file has an encrypted footer but the encryption feature is disabled" + ); +} + +#[tokio::test] +#[cfg(feature = "async")] +async fn test_async_read_without_encryption_enabled_fails() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); + let mut file = tokio::fs::File::open(&path).await.unwrap(); + + let options = ArrowReaderOptions::new(); + let result = ArrowReaderMetadata::load_async(&mut file, options).await; + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + "Parquet error: Parquet file has an encrypted footer but the encryption feature is disabled" + ); +} + +#[test] +#[cfg(feature = "snap")] +fn test_plaintext_footer_read_without_decryption() { + crate::encryption_agnostic::read_plaintext_footer_file_without_decryption_properties(); +} + +#[tokio::test] +#[cfg(all(feature = "async", feature = "snap"))] +async fn test_plaintext_footer_read_without_decryption_async() { + crate::encryption_agnostic::read_plaintext_footer_file_without_decryption_properties_async() + .await; +} diff --git a/parquet/src/util/test_common/encryption_util.rs b/parquet/tests/arrow_reader/encryption_util.rs similarity index 59% rename from parquet/src/util/test_common/encryption_util.rs rename to parquet/tests/arrow_reader/encryption_util.rs index 2f6e5bc45636..de21f13ca6d4 100644 --- a/parquet/src/util/test_common/encryption_util.rs +++ b/parquet/tests/arrow_reader/encryption_util.rs @@ -15,67 +15,17 @@ // specific language governing permissions and limitations // under the License. -use crate::arrow::arrow_reader::{ - ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, -}; -use crate::arrow::ParquetRecordBatchStreamBuilder; -use crate::encryption::decrypt::FileDecryptionProperties; -use crate::errors::ParquetError; -use crate::file::metadata::FileMetaData; use arrow_array::cast::AsArray; use arrow_array::{types, RecordBatch}; -use futures::TryStreamExt; -use std::fs::File; +use parquet::file::metadata::ParquetMetaData; -pub(crate) fn verify_encryption_test_file_read( - file: File, - decryption_properties: FileDecryptionProperties, -) { - let options = ArrowReaderOptions::default() - .with_file_decryption_properties(decryption_properties.clone()); - let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); - let file_metadata = metadata.metadata.file_metadata(); - - let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); - let record_reader = builder.build().unwrap(); - let record_batches = record_reader - .map(|x| x.unwrap()) - .collect::>(); - - verify_encryption_test_data(record_batches, file_metadata.clone(), metadata); -} - -pub(crate) async fn verify_encryption_test_file_read_async( - file: &mut tokio::fs::File, - decryption_properties: FileDecryptionProperties, -) -> Result<(), ParquetError> { - let options = ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties); - - let metadata = ArrowReaderMetadata::load_async(file, options.clone()).await?; - let arrow_reader_metadata = ArrowReaderMetadata::load_async(file, options).await?; - let file_metadata = metadata.metadata.file_metadata(); - - let record_reader = ParquetRecordBatchStreamBuilder::new_with_metadata( - file.try_clone().await?, - arrow_reader_metadata.clone(), - ) - .build()?; - let record_batches = record_reader.try_collect::>().await?; - - verify_encryption_test_data(record_batches, file_metadata.clone(), metadata); - Ok(()) -} - -/// Tests reading an encrypted file from the parquet-testing repository -fn verify_encryption_test_data( - record_batches: Vec, - file_metadata: FileMetaData, - metadata: ArrowReaderMetadata, -) { +/// Verifies data read from an encrypted file from the parquet-testing repository +pub fn verify_encryption_test_data(record_batches: Vec, metadata: &ParquetMetaData) { + let file_metadata = metadata.file_metadata(); assert_eq!(file_metadata.num_rows(), 50); assert_eq!(file_metadata.schema_descr().num_columns(), 8); - metadata.metadata.row_groups().iter().for_each(|rg| { + metadata.row_groups().iter().for_each(|rg| { assert_eq!(rg.num_columns(), 8); assert_eq!(rg.num_rows(), 50); }); diff --git a/parquet/tests/arrow_reader/mod.rs b/parquet/tests/arrow_reader/mod.rs index 0e6783583cd5..93b63921d333 100644 --- a/parquet/tests/arrow_reader/mod.rs +++ b/parquet/tests/arrow_reader/mod.rs @@ -38,6 +38,15 @@ use tempfile::NamedTempFile; mod bad_data; #[cfg(feature = "crc")] mod checksum; +#[cfg(feature = "encryption")] +mod encryption; +mod encryption_agnostic; +#[cfg(all(feature = "encryption", feature = "async"))] +mod encryption_async; +#[cfg(not(feature = "encryption"))] +mod encryption_disabled; +#[cfg(feature = "encryption")] +mod encryption_util; mod statistics; // returns a struct array with columns "int32_col", "float32_col" and "float64_col" with the specified values