Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 0 additions & 240 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,6 @@ mod tests {
};
use arrow_select::concat::concat_batches;

use crate::arrow::arrow_reader::ArrowReaderMetadata;
use crate::arrow::arrow_reader::{
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector,
Expand All @@ -1018,15 +1017,11 @@ mod tests {
BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
FloatType, Int32Type, Int64Type, Int96Type,
};
#[cfg(feature = "encryption")]
use crate::encryption::decrypt::FileDecryptionProperties;
use crate::errors::Result;
use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
use crate::file::writer::SerializedFileWriter;
use crate::schema::parser::parse_message_type;
use crate::schema::types::{Type, TypePtr};
#[cfg(feature = "encryption")]
use crate::util::test_common::encryption_util::verify_encryption_test_file_read;
use crate::util::test_common::rand_gen::RandGen;

#[test]
Expand Down Expand Up @@ -1855,241 +1850,6 @@ mod tests {
assert!(col.value(2).is_nan());
}

#[test]
#[cfg(feature = "encryption")]
fn test_non_uniform_encryption_plaintext_footer() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
let file = File::open(path).unwrap();

// There is always a footer key even with a plaintext footer,
// but this is used for signing the footer.
let footer_key = "0123456789012345".as_bytes(); // 128bit/16
let column_1_key = "1234567890123450".as_bytes();
let column_2_key = "1234567890123451".as_bytes();

let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
.with_column_key("double_field", column_1_key.to_vec())
.with_column_key("float_field", column_2_key.to_vec())
.build()
.unwrap();

verify_encryption_test_file_read(file, decryption_properties);
}

#[test]
#[cfg(feature = "encryption")]
fn test_non_uniform_encryption_disabled_aad_storage() {
let testdata = arrow::util::test_util::parquet_test_data();
let path =
format!("{testdata}/encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted");
let file = File::open(path.clone()).unwrap();

let footer_key = "0123456789012345".as_bytes(); // 128bit/16
let column_1_key = "1234567890123450".as_bytes();
let column_2_key = "1234567890123451".as_bytes();

// Can read successfully when providing the correct AAD prefix
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
.with_column_key("double_field", column_1_key.to_vec())
.with_column_key("float_field", column_2_key.to_vec())
.with_aad_prefix("tester".as_bytes().to_vec())
.build()
.unwrap();

verify_encryption_test_file_read(file, decryption_properties);

// Using wrong AAD prefix should fail
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
.with_column_key("double_field", column_1_key.to_vec())
.with_column_key("float_field", column_2_key.to_vec())
.with_aad_prefix("wrong_aad_prefix".as_bytes().to_vec())
.build()
.unwrap();

let file = File::open(path.clone()).unwrap();
let options = ArrowReaderOptions::default()
.with_file_decryption_properties(decryption_properties.clone());
let result = ArrowReaderMetadata::load(&file, options.clone());
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Parquet error: Provided footer key and AAD were unable to decrypt parquet footer"
);

// Not providing any AAD prefix should fail as it isn't stored in the file
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
.with_column_key("double_field", column_1_key.to_vec())
.with_column_key("float_field", column_2_key.to_vec())
.build()
.unwrap();

let file = File::open(path).unwrap();
let options = ArrowReaderOptions::default()
.with_file_decryption_properties(decryption_properties.clone());
let result = ArrowReaderMetadata::load(&file, options.clone());
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Parquet error: Provided footer key and AAD were unable to decrypt parquet footer"
);
}

#[test]
fn test_non_uniform_encryption_plaintext_footer_without_decryption() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
let file = File::open(&path).unwrap();

let metadata = ArrowReaderMetadata::load(&file, Default::default()).unwrap();
let file_metadata = metadata.metadata.file_metadata();

assert_eq!(file_metadata.num_rows(), 50);
assert_eq!(file_metadata.schema_descr().num_columns(), 8);
assert_eq!(
file_metadata.created_by().unwrap(),
"parquet-cpp-arrow version 19.0.0-SNAPSHOT"
);

metadata.metadata.row_groups().iter().for_each(|rg| {
assert_eq!(rg.num_columns(), 8);
assert_eq!(rg.num_rows(), 50);
});

// Should be able to read unencrypted columns. Test reading one column.
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]);
let record_reader = builder.with_projection(mask).build().unwrap();

let mut row_count = 0;
for batch in record_reader {
let batch = batch.unwrap();
row_count += batch.num_rows();

let time_col = batch
.column(0)
.as_primitive::<types::Time32MillisecondType>();
for (i, x) in time_col.iter().enumerate() {
assert_eq!(x.unwrap(), i as i32);
}
}

assert_eq!(row_count, file_metadata.num_rows() as usize);

// Reading an encrypted column should fail
let file = File::open(&path).unwrap();
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]);
let mut record_reader = builder.with_projection(mask).build().unwrap();

match record_reader.next() {
Some(Err(ArrowError::ParquetError(s))) => {
assert!(s.contains("protocol error"));
}
_ => {
panic!("Expected ArrowError::ParquetError");
}
};
}

#[test]
#[cfg(feature = "encryption")]
fn test_non_uniform_encryption() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
let file = File::open(path).unwrap();

let footer_key = "0123456789012345".as_bytes(); // 128bit/16
let column_1_key = "1234567890123450".as_bytes();
let column_2_key = "1234567890123451".as_bytes();

let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
.with_column_key("double_field", column_1_key.to_vec())
.with_column_key("float_field", column_2_key.to_vec())
.build()
.unwrap();

verify_encryption_test_file_read(file, decryption_properties);
}

#[test]
#[cfg(feature = "encryption")]
fn test_uniform_encryption() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
let file = File::open(path).unwrap();

let key_code: &[u8] = "0123456789012345".as_bytes();
let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec())
.build()
.unwrap();

verify_encryption_test_file_read(file, decryption_properties);
}

#[test]
#[cfg(not(feature = "encryption"))]
fn test_decrypting_without_encryption_flag_fails() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
let file = File::open(path).unwrap();

let options = ArrowReaderOptions::default();
let result = ArrowReaderMetadata::load(&file, options.clone());
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Parquet error: Parquet file has an encrypted footer but the encryption feature is disabled"
);
}

#[test]
#[cfg(feature = "encryption")]
fn test_decrypting_without_decryption_properties_fails() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
let file = File::open(path).unwrap();

let options = ArrowReaderOptions::default();
let result = ArrowReaderMetadata::load(&file, options.clone());
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Parquet error: Parquet file has an encrypted footer but no decryption properties were provided"
);
}

#[test]
#[cfg(feature = "encryption")]
fn test_aes_ctr_encryption() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/encrypt_columns_and_footer_ctr.parquet.encrypted");
let file = File::open(path).unwrap();

let footer_key = "0123456789012345".as_bytes();
let column_1_key = "1234567890123450".as_bytes();
let column_2_key = "1234567890123451".as_bytes();

let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
.with_column_key("double_field", column_1_key.to_vec())
.with_column_key("float_field", column_2_key.to_vec())
.build()
.unwrap();

let options =
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
let metadata = ArrowReaderMetadata::load(&file, options);

match metadata {
Err(crate::errors::ParquetError::NYI(s)) => {
assert!(s.contains("AES_GCM_CTR_V1"));
}
_ => {
panic!("Expected ParquetError::NYI");
}
};
}

#[test]
fn test_read_float32_float64_byte_stream_split() {
let path = format!(
Expand Down
Loading
Loading