From 5b8e4a9180480b5531720e79018f52f17efaedd1 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 29 Apr 2025 00:27:47 +0200 Subject: [PATCH 01/23] Initial commit --- parquet/src/encryption/ciphers.rs | 20 +++++++-- parquet/src/encryption/decrypt.rs | 56 +++++++++++++++++++++++++- parquet/src/file/metadata/reader.rs | 16 ++++++-- parquet/tests/encryption/encryption.rs | 13 +++++- 4 files changed, 95 insertions(+), 10 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index 18a6f5776d6b..a9c2f269e421 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -23,12 +23,14 @@ use ring::rand::{SecureRandom, SystemRandom}; use std::fmt::Debug; const RIGHT_TWELVE: u128 = 0x0000_0000_ffff_ffff_ffff_ffff_ffff_ffff; -const NONCE_LEN: usize = 12; -const TAG_LEN: usize = 16; -const SIZE_LEN: usize = 4; +pub(crate) const NONCE_LEN: usize = 12; +pub(crate) const TAG_LEN: usize = 16; +pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; + + fn compute_tag(&self, nonce: &[u8], aad: &[u8], ciphertext: &mut [u8]) -> Result>; } #[derive(Debug, Clone)] @@ -63,6 +65,18 @@ impl BlockDecryptor for RingGcmBlockDecryptor { result.resize(result.len() - TAG_LEN, 0u8); Ok(result) } + + fn compute_tag(&self, nonce: &[u8], aad: &[u8], plaintext: &mut [u8]) -> Result> { + let nonce = ring::aead::Nonce::try_assume_unique_for_key( + &nonce, + )?; + let tag = self.key.seal_in_place_separate_tag( + nonce, + Aad::from(aad), + plaintext, + )?; + Ok(tag.as_ref().to_vec()) + } } pub(crate) trait BlockEncryptor: Debug + Send + Sync { diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 6a51f1a6570e..70a40ed4a214 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -17,8 +17,8 @@ //! Configuration and utilities for decryption of files using Parquet Modular Encryption -use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor}; -use crate::encryption::modules::{create_module_aad, ModuleType}; +use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor, NONCE_LEN, TAG_LEN, SIZE_LEN}; +use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::ColumnCryptoMetaData; use std::borrow::Cow; @@ -26,6 +26,9 @@ use std::collections::HashMap; use std::fmt::Formatter; use std::io::Read; use std::sync::Arc; +use thrift::protocol::TCompactOutputProtocol; +use crate::thrift::TSerializable; +use crate::format::{FileMetaData as TFileMetaData}; /// Trait for retrieving an encryption key using the key's metadata /// @@ -331,6 +334,7 @@ impl PartialEq for DecryptionKeys { pub struct FileDecryptionProperties { keys: DecryptionKeys, aad_prefix: Option>, + footer_signature_verification: bool, } impl FileDecryptionProperties { @@ -351,6 +355,11 @@ impl FileDecryptionProperties { self.aad_prefix.as_ref() } + /// Returns true if footer signature verification is enabled. + pub fn check_plaintext_footer_integrity(&self) -> bool { + self.footer_signature_verification + } + /// Get the encryption key for decrypting a file's footer, /// and also column data if uniform encryption is used. pub fn footer_key(&self, key_metadata: Option<&[u8]>) -> Result>> { @@ -415,6 +424,7 @@ pub struct DecryptionPropertiesBuilder { key_retriever: Option>, column_keys: HashMap>, aad_prefix: Option>, + footer_signature_verification: bool, } impl DecryptionPropertiesBuilder { @@ -426,6 +436,7 @@ impl DecryptionPropertiesBuilder { key_retriever: None, column_keys: HashMap::default(), aad_prefix: None, + footer_signature_verification: true, } } @@ -439,6 +450,7 @@ impl DecryptionPropertiesBuilder { key_retriever: Some(key_retriever), column_keys: HashMap::default(), aad_prefix: None, + footer_signature_verification: true, } } @@ -464,6 +476,7 @@ impl DecryptionPropertiesBuilder { Ok(FileDecryptionProperties { keys, aad_prefix: self.aad_prefix, + footer_signature_verification: self.footer_signature_verification, }) } @@ -496,6 +509,13 @@ impl DecryptionPropertiesBuilder { } Ok(self) } + + /// Specify if footer signature verification should be disabled. + /// Signature verification is enabled by default. + pub fn disable_footer_signature_verification(mut self) -> Self { + self.footer_signature_verification = false; + self + } } #[derive(Clone, Debug)] @@ -538,6 +558,38 @@ impl FileDecryptor { Ok(self.footer_decryptor.clone()) } + /// Verify the signature of the footer + pub(crate) fn verify_signature(&self, file_metadata: &TFileMetaData, buf: &[u8]) -> Result<()> { + let mut plaintext: Vec = vec![]; + { + let mut unencrypted_protocol = TCompactOutputProtocol::new(&mut plaintext); + file_metadata.write_to_out_protocol(&mut unencrypted_protocol)?; + } + + // Format is: [ciphertext size, nonce, ciphertext, authentication tag] + let nonce = &buf[SIZE_LEN..SIZE_LEN + NONCE_LEN]; + let tag = &buf[buf.len() - TAG_LEN..]; + + let aad = create_footer_aad(self.file_aad())?; + // let aad = self.file_aad(); + let footer_decryptor = self.get_footer_decryptor()?; + + let computed_tag = footer_decryptor.compute_tag( + nonce, + aad.as_ref(), + &mut plaintext, + )?; + + if computed_tag != tag { + return Err(general_err!( + "Footer signature verification failed. Computed: {:?}, Expected: {:?}", + computed_tag, + tag + )); + } + Ok(()) + } + pub(crate) fn get_column_data_decryptor( &self, column_name: &str, diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 00b6b2d4f545..9bf9d02e3ebc 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -17,14 +17,13 @@ use std::{io::Read, ops::Range, sync::Arc}; -use bytes::Bytes; - use crate::basic::ColumnOrder; #[cfg(feature = "encryption")] use crate::encryption::{ decrypt::{FileDecryptionProperties, FileDecryptor}, modules::create_footer_aad, }; +use bytes::Bytes; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData}; @@ -963,15 +962,24 @@ impl ParquetMetaDataReader { let schema_descr = Arc::new(SchemaDescriptor::new(schema)); if let (Some(algo), Some(file_decryption_properties)) = ( - t_file_metadata.encryption_algorithm, + t_file_metadata.encryption_algorithm.clone(), file_decryption_properties, ) { // File has a plaintext footer but encryption algorithm is set file_decryptor = Some(get_file_decryptor( algo, - t_file_metadata.footer_signing_key_metadata.as_deref(), + t_file_metadata + .footer_signing_key_metadata + .clone() + .as_deref(), file_decryption_properties, )?); + if file_decryption_properties.check_plaintext_footer_integrity() { + file_decryptor + .clone() + .unwrap() + .verify_signature(&t_file_metadata, buf.as_ref())?; + } } let mut row_groups = Vec::new(); diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index 664850e507da..5bd8923d014c 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -43,7 +43,7 @@ use std::sync::Arc; fn test_non_uniform_encryption_plaintext_footer() { let test_data = arrow::util::test_util::parquet_test_data(); let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); - let file = File::open(path).unwrap(); + let file = File::open(path.clone()).unwrap(); // There is always a footer key even with a plaintext footer, // but this is used for signing the footer. @@ -51,6 +51,17 @@ fn test_non_uniform_encryption_plaintext_footer() { let column_1_key = "1234567890123450".as_bytes(); let column_2_key = "1234567890123451".as_bytes(); + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .disable_footer_signature_verification() + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read(file, decryption_properties); + + let file = File::open(path.clone()).unwrap(); + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) .with_column_key("double_field", column_1_key.to_vec()) .with_column_key("float_field", column_2_key.to_vec()) From 3479dcaa75b654ac8fd6da438c98ea31fe1b4d56 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 30 Apr 2025 00:01:53 +0200 Subject: [PATCH 02/23] Lint and clippy --- parquet/src/encryption/ciphers.rs | 12 ++++-------- parquet/src/encryption/decrypt.rs | 14 ++++++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index a9c2f269e421..b0d71905ad6c 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -67,14 +67,10 @@ impl BlockDecryptor for RingGcmBlockDecryptor { } fn compute_tag(&self, nonce: &[u8], aad: &[u8], plaintext: &mut [u8]) -> Result> { - let nonce = ring::aead::Nonce::try_assume_unique_for_key( - &nonce, - )?; - let tag = self.key.seal_in_place_separate_tag( - nonce, - Aad::from(aad), - plaintext, - )?; + let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; + let tag = self + .key + .seal_in_place_separate_tag(nonce, Aad::from(aad), plaintext)?; Ok(tag.as_ref().to_vec()) } } diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 70a40ed4a214..7185a6a1a4ac 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -17,18 +17,20 @@ //! Configuration and utilities for decryption of files using Parquet Modular Encryption -use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor, NONCE_LEN, TAG_LEN, SIZE_LEN}; +use crate::encryption::ciphers::{ + BlockDecryptor, RingGcmBlockDecryptor, NONCE_LEN, SIZE_LEN, TAG_LEN, +}; use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::ColumnCryptoMetaData; +use crate::format::FileMetaData as TFileMetaData; +use crate::thrift::TSerializable; use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Formatter; use std::io::Read; use std::sync::Arc; use thrift::protocol::TCompactOutputProtocol; -use crate::thrift::TSerializable; -use crate::format::{FileMetaData as TFileMetaData}; /// Trait for retrieving an encryption key using the key's metadata /// @@ -574,11 +576,7 @@ impl FileDecryptor { // let aad = self.file_aad(); let footer_decryptor = self.get_footer_decryptor()?; - let computed_tag = footer_decryptor.compute_tag( - nonce, - aad.as_ref(), - &mut plaintext, - )?; + let computed_tag = footer_decryptor.compute_tag(nonce, aad.as_ref(), &mut plaintext)?; if computed_tag != tag { return Err(general_err!( From 5a019d66ad76710b1add2deaf33a19c473fe9e9b Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 16:28:19 +0200 Subject: [PATCH 03/23] Plaintext layout is different to encrypted one --- parquet/src/encryption/ciphers.rs | 9 ++++++--- parquet/src/encryption/decrypt.rs | 22 +++++----------------- parquet/src/file/metadata/reader.rs | 3 ++- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index b0d71905ad6c..db51710f369a 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -30,7 +30,7 @@ pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; - fn compute_tag(&self, nonce: &[u8], aad: &[u8], ciphertext: &mut [u8]) -> Result>; + fn compute_plaintext_footer_tag(&self, aad: &[u8], plaintext_footer: &mut [u8]) -> Result>; } #[derive(Debug, Clone)] @@ -66,11 +66,14 @@ impl BlockDecryptor for RingGcmBlockDecryptor { Ok(result) } - fn compute_tag(&self, nonce: &[u8], aad: &[u8], plaintext: &mut [u8]) -> Result> { + fn compute_plaintext_footer_tag(&self, aad: &[u8], plaintext_footer: &mut [u8]) -> Result> { + // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] + let nonce = &plaintext_footer[plaintext_footer.len() - NONCE_LEN - TAG_LEN..plaintext_footer.len() - TAG_LEN]; let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; + let plaintext_end = plaintext_footer.len() - NONCE_LEN - TAG_LEN; let tag = self .key - .seal_in_place_separate_tag(nonce, Aad::from(aad), plaintext)?; + .seal_in_place_separate_tag(nonce, Aad::from(aad), &mut plaintext_footer[..plaintext_end])?; Ok(tag.as_ref().to_vec()) } } diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 7185a6a1a4ac..9990d921bec4 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -18,19 +18,16 @@ //! Configuration and utilities for decryption of files using Parquet Modular Encryption use crate::encryption::ciphers::{ - BlockDecryptor, RingGcmBlockDecryptor, NONCE_LEN, SIZE_LEN, TAG_LEN, + BlockDecryptor, RingGcmBlockDecryptor, TAG_LEN, }; use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::ColumnCryptoMetaData; -use crate::format::FileMetaData as TFileMetaData; -use crate::thrift::TSerializable; use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Formatter; use std::io::Read; use std::sync::Arc; -use thrift::protocol::TCompactOutputProtocol; /// Trait for retrieving an encryption key using the key's metadata /// @@ -561,22 +558,13 @@ impl FileDecryptor { } /// Verify the signature of the footer - pub(crate) fn verify_signature(&self, file_metadata: &TFileMetaData, buf: &[u8]) -> Result<()> { - let mut plaintext: Vec = vec![]; - { - let mut unencrypted_protocol = TCompactOutputProtocol::new(&mut plaintext); - file_metadata.write_to_out_protocol(&mut unencrypted_protocol)?; - } - - // Format is: [ciphertext size, nonce, ciphertext, authentication tag] - let nonce = &buf[SIZE_LEN..SIZE_LEN + NONCE_LEN]; - let tag = &buf[buf.len() - TAG_LEN..]; - + pub(crate) fn verify_plaintext_footer_signature(&self, plaintext_footer: &mut [u8]) -> Result<()> { + // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] + let tag = plaintext_footer[plaintext_footer.len() - TAG_LEN..].to_vec(); let aad = create_footer_aad(self.file_aad())?; - // let aad = self.file_aad(); let footer_decryptor = self.get_footer_decryptor()?; - let computed_tag = footer_decryptor.compute_tag(nonce, aad.as_ref(), &mut plaintext)?; + let computed_tag = footer_decryptor.compute_plaintext_footer_tag(&aad, plaintext_footer)?; if computed_tag != tag { return Err(general_err!( diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 9bf9d02e3ebc..ff307b985291 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -975,10 +975,11 @@ impl ParquetMetaDataReader { file_decryption_properties, )?); if file_decryption_properties.check_plaintext_footer_integrity() { + let mut cpy_buf = buf.to_vec(); file_decryptor .clone() .unwrap() - .verify_signature(&t_file_metadata, buf.as_ref())?; + .verify_plaintext_footer_signature(cpy_buf.as_mut())?; } } From 551fa0e3c2558cfcd3acad135c95282e8c54ac2d Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 16:40:58 +0200 Subject: [PATCH 04/23] Lint and expected memory size at decryption --- parquet/src/encryption/ciphers.rs | 23 +++++++++++++++++------ parquet/src/encryption/decrypt.rs | 9 +++++---- parquet/src/file/metadata/mod.rs | 4 ++-- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index db51710f369a..588cb3d5c854 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -30,7 +30,11 @@ pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; - fn compute_plaintext_footer_tag(&self, aad: &[u8], plaintext_footer: &mut [u8]) -> Result>; + fn compute_plaintext_footer_tag( + &self, + aad: &[u8], + plaintext_footer: &mut [u8], + ) -> Result>; } #[derive(Debug, Clone)] @@ -66,14 +70,21 @@ impl BlockDecryptor for RingGcmBlockDecryptor { Ok(result) } - fn compute_plaintext_footer_tag(&self, aad: &[u8], plaintext_footer: &mut [u8]) -> Result> { + fn compute_plaintext_footer_tag( + &self, + aad: &[u8], + plaintext_footer: &mut [u8], + ) -> Result> { // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] - let nonce = &plaintext_footer[plaintext_footer.len() - NONCE_LEN - TAG_LEN..plaintext_footer.len() - TAG_LEN]; + let nonce = &plaintext_footer + [plaintext_footer.len() - NONCE_LEN - TAG_LEN..plaintext_footer.len() - TAG_LEN]; let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; let plaintext_end = plaintext_footer.len() - NONCE_LEN - TAG_LEN; - let tag = self - .key - .seal_in_place_separate_tag(nonce, Aad::from(aad), &mut plaintext_footer[..plaintext_end])?; + let tag = self.key.seal_in_place_separate_tag( + nonce, + Aad::from(aad), + &mut plaintext_footer[..plaintext_end], + )?; Ok(tag.as_ref().to_vec()) } } diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 9990d921bec4..b0932db6933c 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -17,9 +17,7 @@ //! Configuration and utilities for decryption of files using Parquet Modular Encryption -use crate::encryption::ciphers::{ - BlockDecryptor, RingGcmBlockDecryptor, TAG_LEN, -}; +use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor, TAG_LEN}; use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::ColumnCryptoMetaData; @@ -558,7 +556,10 @@ impl FileDecryptor { } /// Verify the signature of the footer - pub(crate) fn verify_plaintext_footer_signature(&self, plaintext_footer: &mut [u8]) -> Result<()> { + pub(crate) fn verify_plaintext_footer_signature( + &self, + plaintext_footer: &mut [u8], + ) -> Result<()> { // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] let tag = plaintext_footer[plaintext_footer.len() - TAG_LEN..].to_vec(); let aad = create_footer_aad(self.file_aad())?; diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index e496cae4dead..d5877aa4566a 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -1999,7 +1999,7 @@ mod tests { #[cfg(not(feature = "encryption"))] let base_expected_size = 2312; #[cfg(feature = "encryption")] - let base_expected_size = 2640; + let base_expected_size = 2648; assert_eq!(parquet_meta.memory_size(), base_expected_size); @@ -2029,7 +2029,7 @@ mod tests { #[cfg(not(feature = "encryption"))] let bigger_expected_size = 2816; #[cfg(feature = "encryption")] - let bigger_expected_size = 3144; + let bigger_expected_size = 3152; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size); From 63d62ff0bb6aa231b9a6432994f4bf3a6708077a Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 16:42:49 +0200 Subject: [PATCH 05/23] Apply suggestions from code review Co-authored-by: Adam Reeve --- parquet/src/encryption/decrypt.rs | 4 ++-- parquet/src/file/metadata/reader.rs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index b0932db6933c..7966e84b962e 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -352,7 +352,7 @@ impl FileDecryptionProperties { self.aad_prefix.as_ref() } - /// Returns true if footer signature verification is enabled. + /// Returns true if footer signature verification is enabled for files with plaintext footers. pub fn check_plaintext_footer_integrity(&self) -> bool { self.footer_signature_verification } @@ -507,7 +507,7 @@ impl DecryptionPropertiesBuilder { Ok(self) } - /// Specify if footer signature verification should be disabled. + /// Disable verification of footer tags for files that use plaintext footers. /// Signature verification is enabled by default. pub fn disable_footer_signature_verification(mut self) -> Self { self.footer_signature_verification = false; diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index ff307b985291..4868252b59f0 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -975,11 +975,10 @@ impl ParquetMetaDataReader { file_decryption_properties, )?); if file_decryption_properties.check_plaintext_footer_integrity() { - let mut cpy_buf = buf.to_vec(); file_decryptor .clone() .unwrap() - .verify_plaintext_footer_signature(cpy_buf.as_mut())?; + .verify_plaintext_footer_signature(buf.to_vec().as_mut())?; } } From a00efecc1399faad421227132e4ca121ffe7cfff Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 May 2025 00:22:24 +0200 Subject: [PATCH 06/23] Review feedback --- parquet/src/encryption/ciphers.rs | 17 +++++++------- parquet/src/encryption/decrypt.rs | 2 +- parquet/src/file/metadata/reader.rs | 19 ++++++---------- parquet/tests/encryption/encryption.rs | 31 ++++++++++++++++++++++++-- 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index 588cb3d5c854..9a3250dc4a28 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -30,10 +30,10 @@ pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; - fn compute_plaintext_footer_tag( + fn compute_plaintext_tag( &self, aad: &[u8], - plaintext_footer: &mut [u8], + plaintext: &mut [u8], ) -> Result>; } @@ -70,20 +70,19 @@ impl BlockDecryptor for RingGcmBlockDecryptor { Ok(result) } - fn compute_plaintext_footer_tag( + fn compute_plaintext_tag( &self, aad: &[u8], - plaintext_footer: &mut [u8], + plaintext: &mut [u8], ) -> Result> { - // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] - let nonce = &plaintext_footer - [plaintext_footer.len() - NONCE_LEN - TAG_LEN..plaintext_footer.len() - TAG_LEN]; + let nonce = &plaintext + [plaintext.len() - NONCE_LEN - TAG_LEN..plaintext.len() - TAG_LEN]; let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; - let plaintext_end = plaintext_footer.len() - NONCE_LEN - TAG_LEN; + let plaintext_end = plaintext.len() - NONCE_LEN - TAG_LEN; let tag = self.key.seal_in_place_separate_tag( nonce, Aad::from(aad), - &mut plaintext_footer[..plaintext_end], + &mut plaintext[..plaintext_end], )?; Ok(tag.as_ref().to_vec()) } diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 7966e84b962e..0b592f1d186b 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -565,7 +565,7 @@ impl FileDecryptor { let aad = create_footer_aad(self.file_aad())?; let footer_decryptor = self.get_footer_decryptor()?; - let computed_tag = footer_decryptor.compute_plaintext_footer_tag(&aad, plaintext_footer)?; + let computed_tag = footer_decryptor.compute_plaintext_tag(&aad, plaintext_footer)?; if computed_tag != tag { return Err(general_err!( diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 4868252b59f0..62c686bdac69 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -962,24 +962,19 @@ impl ParquetMetaDataReader { let schema_descr = Arc::new(SchemaDescriptor::new(schema)); if let (Some(algo), Some(file_decryption_properties)) = ( - t_file_metadata.encryption_algorithm.clone(), + t_file_metadata.encryption_algorithm, file_decryption_properties, ) { // File has a plaintext footer but encryption algorithm is set - file_decryptor = Some(get_file_decryptor( + let file_decryptor_value = get_file_decryptor( algo, - t_file_metadata - .footer_signing_key_metadata - .clone() - .as_deref(), + t_file_metadata.footer_signing_key_metadata.as_deref(), file_decryption_properties, - )?); - if file_decryption_properties.check_plaintext_footer_integrity() { - file_decryptor - .clone() - .unwrap() - .verify_plaintext_footer_signature(buf.to_vec().as_mut())?; + )?; + if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { + file_decryptor_value.verify_plaintext_footer_signature(buf.to_vec().as_mut())?; } + file_decryptor = Some(file_decryptor_value); } let mut row_groups = Vec::new(); diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index 5bd8923d014c..48e1278db986 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -43,7 +43,7 @@ use std::sync::Arc; fn test_non_uniform_encryption_plaintext_footer() { let test_data = arrow::util::test_util::parquet_test_data(); let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); - let file = File::open(path.clone()).unwrap(); + let file = File::open(path).unwrap(); // There is always a footer key even with a plaintext footer, // but this is used for signing the footer. @@ -52,23 +52,50 @@ fn test_non_uniform_encryption_plaintext_footer() { let column_2_key = "1234567890123451".as_bytes(); let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) - .disable_footer_signature_verification() .with_column_key("double_field", column_1_key.to_vec()) .with_column_key("float_field", column_2_key.to_vec()) .build() .unwrap(); verify_encryption_test_file_read(file, decryption_properties); +} +#[test] +fn test_plaintext_footer_signature_verification() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); let file = File::open(path.clone()).unwrap(); + let footer_key = "0000000000000000".as_bytes(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .disable_footer_signature_verification() .with_column_key("double_field", column_1_key.to_vec()) .with_column_key("float_field", column_2_key.to_vec()) .build() .unwrap(); verify_encryption_test_file_read(file, decryption_properties); + + let file = File::open(path.clone()).unwrap(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + let options = ArrowReaderOptions::default() + .with_file_decryption_properties(decryption_properties.clone()); + let result = ArrowReaderMetadata::load(&file, options.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .starts_with("Parquet error: Footer signature verification failed. Computed: [")); + // verify_encryption_test_file_read(file, decryption_properties); } #[test] From cab588ba240c6cf0a422fa8450a566353fd0441f Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 May 2025 00:30:26 +0200 Subject: [PATCH 07/23] Lint --- parquet/src/encryption/ciphers.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index 9a3250dc4a28..31fdab0d9af2 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -30,11 +30,7 @@ pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; - fn compute_plaintext_tag( - &self, - aad: &[u8], - plaintext: &mut [u8], - ) -> Result>; + fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &mut [u8]) -> Result>; } #[derive(Debug, Clone)] @@ -70,13 +66,8 @@ impl BlockDecryptor for RingGcmBlockDecryptor { Ok(result) } - fn compute_plaintext_tag( - &self, - aad: &[u8], - plaintext: &mut [u8], - ) -> Result> { - let nonce = &plaintext - [plaintext.len() - NONCE_LEN - TAG_LEN..plaintext.len() - TAG_LEN]; + fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &mut [u8]) -> Result> { + let nonce = &plaintext[plaintext.len() - NONCE_LEN - TAG_LEN..plaintext.len() - TAG_LEN]; let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; let plaintext_end = plaintext.len() - NONCE_LEN - TAG_LEN; let tag = self.key.seal_in_place_separate_tag( From 53a2ace0ff1a6b4ffdf25b4a4527694bb637a396 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 May 2025 11:55:28 +0200 Subject: [PATCH 08/23] Update parquet/tests/encryption/encryption.rs Co-authored-by: Adam Reeve --- parquet/tests/encryption/encryption.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index 48e1278db986..da263a5df6b3 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -95,7 +95,6 @@ fn test_plaintext_footer_signature_verification() { .unwrap_err() .to_string() .starts_with("Parquet error: Footer signature verification failed. Computed: [")); - // verify_encryption_test_file_read(file, decryption_properties); } #[test] From e87ccff685640ec1bdb8a02b638fc96574052347 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 May 2025 12:43:43 +0200 Subject: [PATCH 09/23] Review feedback --- parquet/src/encryption/ciphers.rs | 5 +++-- parquet/src/encryption/decrypt.rs | 7 ++----- parquet/src/file/metadata/reader.rs | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index 31fdab0d9af2..5764694675ff 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -30,7 +30,7 @@ pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; - fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &mut [u8]) -> Result>; + fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &[u8]) -> Result>; } #[derive(Debug, Clone)] @@ -66,7 +66,8 @@ impl BlockDecryptor for RingGcmBlockDecryptor { Ok(result) } - fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &mut [u8]) -> Result> { + fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &[u8]) -> Result> { + let mut plaintext = plaintext.to_vec(); let nonce = &plaintext[plaintext.len() - NONCE_LEN - TAG_LEN..plaintext.len() - TAG_LEN]; let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; let plaintext_end = plaintext.len() - NONCE_LEN - TAG_LEN; diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 0b592f1d186b..2cb6cccc002e 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -556,12 +556,9 @@ impl FileDecryptor { } /// Verify the signature of the footer - pub(crate) fn verify_plaintext_footer_signature( - &self, - plaintext_footer: &mut [u8], - ) -> Result<()> { + pub(crate) fn verify_plaintext_footer_signature(&self, plaintext_footer: &[u8]) -> Result<()> { // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] - let tag = plaintext_footer[plaintext_footer.len() - TAG_LEN..].to_vec(); + let tag = &plaintext_footer[plaintext_footer.len() - TAG_LEN..]; let aad = create_footer_aad(self.file_aad())?; let footer_decryptor = self.get_footer_decryptor()?; diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 62c686bdac69..6edf2e611d42 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -972,7 +972,7 @@ impl ParquetMetaDataReader { file_decryption_properties, )?; if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { - file_decryptor_value.verify_plaintext_footer_signature(buf.to_vec().as_mut())?; + file_decryptor_value.verify_plaintext_footer_signature(buf)?; } file_decryptor = Some(file_decryptor_value); } From 02fb79a84bde52e61bb0e2b9765317650b0d01a4 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 23 Apr 2025 22:06:54 +0200 Subject: [PATCH 10/23] Initial commit --- parquet/src/file/writer.rs | 6 ------ parquet/tests/encryption/encryption.rs | 23 ++--------------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 18e357ebc2b9..0298d8a51df6 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -212,12 +212,6 @@ impl SerializedFileWriter { if let Some(file_encryption_properties) = &properties.file_encryption_properties { file_encryption_properties.validate_encrypted_column_names(schema_descriptor)?; - if !file_encryption_properties.encrypt_footer() { - return Err(general_err!( - "Writing encrypted files with plaintext footers is not supported yet" - )); - } - Ok(Some(Arc::new(FileEncryptor::new( file_encryption_properties.clone(), )?))) diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index da263a5df6b3..184bc99a5648 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -438,7 +438,7 @@ fn test_write_non_uniform_encryption() { #[test] fn test_write_uniform_encryption_plaintext_footer() { let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted"); + let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); let footer_key = b"0123456789012345".to_vec(); // 128bit/16 let column_1_key = b"1234567890123450".to_vec(); @@ -455,26 +455,7 @@ fn test_write_uniform_encryption_plaintext_footer() { .build() .unwrap(); - let file = File::open(path).unwrap(); - let options = ArrowReaderOptions::default() - .with_file_decryption_properties(decryption_properties.clone()); - let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); - - let props = WriterProperties::builder() - .with_file_encryption_properties(file_encryption_properties) - .build(); - let temp_file = tempfile::tempfile().unwrap(); - - let writer = ArrowWriter::try_new( - temp_file.try_clone().unwrap(), - metadata.schema().clone(), - Some(props), - ); - assert!(writer.is_err()); - assert_eq!( - writer.unwrap_err().to_string(), - "Parquet error: Writing encrypted files with plaintext footers is not supported yet" - ) + read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties); } #[test] From fae9492d8886e777b43f44cf005becd4975a58f3 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 25 Apr 2025 21:02:39 +0200 Subject: [PATCH 11/23] Encrypt plaintext to extract nonce and footer --- parquet/src/file/metadata/writer.rs | 34 +++++++++++++++++++++++++++++ parquet/src/file/writer.rs | 1 + 2 files changed, 35 insertions(+) diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index c1fc41314415..006615fc251f 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#[cfg(feature = "encryption")] +use crate::encryption::ciphers::{NONCE_LEN, SIZE_LEN, TAG_LEN}; #[cfg(feature = "encryption")] use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, FileEncryptor}; #[cfg(feature = "encryption")] @@ -503,6 +505,38 @@ impl MetadataObjectWriter { let mut encryptor = file_encryptor.get_footer_encryptor()?; encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad) } + Some(file_encryptor) if !file_encryptor.properties().encrypt_footer() => { + // todo: should we also check for file_metadata.encryption_algorithm.is_some() ? + // Write unencrypted footer + let data_len: usize; + { + let mut buffer: Vec = vec![]; + let mut unencrypted_protocol = TCompactOutputProtocol::new(&mut buffer); + file_metadata.write_to_out_protocol(&mut unencrypted_protocol)?; + data_len = buffer.len(); + sink.write_all(&buffer)?; + } + + // Write nonce and tag + { + let mut encrypted_buffer: Vec = vec![]; + let aad = create_footer_aad(file_encryptor.file_aad())?; + let mut encryptor = file_encryptor.get_footer_encryptor()?; + + let mut protocol = TCompactOutputProtocol::new(&mut encrypted_buffer); + file_metadata.write_to_out_protocol(&mut protocol)?; + encryptor.encrypt(encrypted_buffer.as_ref(), &aad)?; + + // todo: check for overflow when calculating lengths + let nonce = + &encrypted_buffer[SIZE_LEN + data_len..SIZE_LEN + data_len + NONCE_LEN]; + sink.write_all(nonce)?; + let tag = &encrypted_buffer[SIZE_LEN + data_len + NONCE_LEN + ..SIZE_LEN + data_len + NONCE_LEN + TAG_LEN]; + sink.write_all(tag)?; + } + Ok(()) + } _ => Self::write_object(file_metadata, &mut sink), } } diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 0298d8a51df6..a70ab2d77f1d 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -349,6 +349,7 @@ impl SerializedFileWriter { ); #[cfg(feature = "encryption")] + // todo { encoder = encoder.with_file_encryptor(self.file_encryptor.clone()); } From 59c2971cd9d3c4b7d3ec9cba7c2591cfd20c2a60 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sat, 26 Apr 2025 00:56:50 +0200 Subject: [PATCH 12/23] fix --- parquet/src/encryption/encrypt.rs | 26 ++++++++++++++++++++- parquet/src/file/metadata/writer.rs | 36 ++++------------------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 9a801434c0db..a292bcdc3c7c 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -17,7 +17,7 @@ //! Configuration and utilities for Parquet Modular Encryption -use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor}; +use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; @@ -374,6 +374,30 @@ pub(crate) fn encrypt_object( Ok(()) } +pub(crate) fn sign_and_write_object( + object: &T, + encryptor: &mut Box, + sink: &mut W, + module_aad: &[u8], +) -> Result<()> { + let mut buffer: Vec = vec![]; + { + let mut protocol = TCompactOutputProtocol::new(&mut buffer); + object.write_to_out_protocol(&mut protocol)?; + } + let plaintext_len = buffer.len(); + sink.write_all(&buffer)?; + + let encrypted_buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?; + + // Format is: [ciphertext size, nonce, ciphertext, authentication tag] + let nonce = encrypted_buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN].to_vec(); + let tag = encrypted_buffer[SIZE_LEN + NONCE_LEN + plaintext_len..SIZE_LEN + NONCE_LEN + plaintext_len + TAG_LEN].to_vec(); + sink.write_all(&nonce)?; + sink.write_all(&tag)?; + Ok(()) +} + /// Encrypt a Thrift serializable object to a byte vector pub(crate) fn encrypt_object_to_vec( object: &T, diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 006615fc251f..9bd5ea6d4c0a 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -16,9 +16,7 @@ // under the License. #[cfg(feature = "encryption")] -use crate::encryption::ciphers::{NONCE_LEN, SIZE_LEN, TAG_LEN}; -#[cfg(feature = "encryption")] -use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, FileEncryptor}; +use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, sign_and_write_object, FileEncryptor}; #[cfg(feature = "encryption")] use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; #[cfg(feature = "encryption")] @@ -507,35 +505,9 @@ impl MetadataObjectWriter { } Some(file_encryptor) if !file_encryptor.properties().encrypt_footer() => { // todo: should we also check for file_metadata.encryption_algorithm.is_some() ? - // Write unencrypted footer - let data_len: usize; - { - let mut buffer: Vec = vec![]; - let mut unencrypted_protocol = TCompactOutputProtocol::new(&mut buffer); - file_metadata.write_to_out_protocol(&mut unencrypted_protocol)?; - data_len = buffer.len(); - sink.write_all(&buffer)?; - } - - // Write nonce and tag - { - let mut encrypted_buffer: Vec = vec![]; - let aad = create_footer_aad(file_encryptor.file_aad())?; - let mut encryptor = file_encryptor.get_footer_encryptor()?; - - let mut protocol = TCompactOutputProtocol::new(&mut encrypted_buffer); - file_metadata.write_to_out_protocol(&mut protocol)?; - encryptor.encrypt(encrypted_buffer.as_ref(), &aad)?; - - // todo: check for overflow when calculating lengths - let nonce = - &encrypted_buffer[SIZE_LEN + data_len..SIZE_LEN + data_len + NONCE_LEN]; - sink.write_all(nonce)?; - let tag = &encrypted_buffer[SIZE_LEN + data_len + NONCE_LEN - ..SIZE_LEN + data_len + NONCE_LEN + TAG_LEN]; - sink.write_all(tag)?; - } - Ok(()) + let aad = create_footer_aad(file_encryptor.file_aad())?; + let mut encryptor = file_encryptor.get_footer_encryptor()?; + sign_and_write_object(file_metadata, &mut encryptor, &mut sink, &aad) } _ => Self::write_object(file_metadata, &mut sink), } From c3599572aa156be017f3c9586a941a770b451abc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 28 Apr 2025 22:24:29 +0200 Subject: [PATCH 13/23] Add encryption algorithm to file_metadata before writing --- parquet/src/encryption/encrypt.rs | 13 +++++++----- parquet/src/file/metadata/writer.rs | 31 +++++++++++++++++++++++------ parquet/src/file/writer.rs | 1 - 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index a292bcdc3c7c..89c242b16dbf 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -385,16 +385,19 @@ pub(crate) fn sign_and_write_object( let mut protocol = TCompactOutputProtocol::new(&mut buffer); object.write_to_out_protocol(&mut protocol)?; } - let plaintext_len = buffer.len(); sink.write_all(&buffer)?; + buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?; + + let ciphertext_length : u32 = buffer.len() + .try_into() + .map_err(|err| general_err!("Plaintext data too long. {:?}", err))?; - let encrypted_buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?; - // Format is: [ciphertext size, nonce, ciphertext, authentication tag] - let nonce = encrypted_buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN].to_vec(); - let tag = encrypted_buffer[SIZE_LEN + NONCE_LEN + plaintext_len..SIZE_LEN + NONCE_LEN + plaintext_len + TAG_LEN].to_vec(); + let nonce = buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN].to_vec(); + let tag = buffer[(ciphertext_length - TAG_LEN as u32) as usize..].to_vec(); sink.write_all(&nonce)?; sink.write_all(&tag)?; + Ok(()) } diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 9bd5ea6d4c0a..088d23edcc7e 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -16,9 +16,10 @@ // under the License. #[cfg(feature = "encryption")] -use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, sign_and_write_object, FileEncryptor}; -#[cfg(feature = "encryption")] -use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; +use crate::encryption::{ + encrypt::{encrypt_object, encrypt_object_to_vec, sign_and_write_object, FileEncryptor}, + modules::{create_footer_aad, create_module_aad, ModuleType}, +}; #[cfg(feature = "encryption")] use crate::errors::ParquetError; use crate::errors::Result; @@ -141,6 +142,25 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { .object_writer .apply_row_group_encryption(self.row_groups)?; + let mut encryption_algorithm = None; + if let Some(file_encryptor) = self.object_writer.file_encryptor.clone() { + let properties = file_encryptor.properties(); + if !properties.encrypt_footer() { + let supply_aad_prefix = properties + .aad_prefix() + .map(|_| !properties.store_aad_prefix()); + encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { + aad_prefix: if properties.store_aad_prefix() { + properties.aad_prefix().cloned() + } else { + None + }, + aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), + supply_aad_prefix, + })); + } + }; + let mut file_metadata = FileMetaData { num_rows, row_groups, @@ -149,7 +169,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { schema: types::to_thrift(self.schema.as_ref())?, created_by: self.created_by.clone(), column_orders, - encryption_algorithm: None, + encryption_algorithm, footer_signing_key_metadata: None, }; @@ -503,8 +523,7 @@ impl MetadataObjectWriter { let mut encryptor = file_encryptor.get_footer_encryptor()?; encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad) } - Some(file_encryptor) if !file_encryptor.properties().encrypt_footer() => { - // todo: should we also check for file_metadata.encryption_algorithm.is_some() ? + Some(file_encryptor) if file_metadata.encryption_algorithm.is_some() => { let aad = create_footer_aad(file_encryptor.file_aad())?; let mut encryptor = file_encryptor.get_footer_encryptor()?; sign_and_write_object(file_metadata, &mut encryptor, &mut sink, &aad) diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index a70ab2d77f1d..0298d8a51df6 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -349,7 +349,6 @@ impl SerializedFileWriter { ); #[cfg(feature = "encryption")] - // todo { encoder = encoder.with_file_encryptor(self.file_encryptor.clone()); } From 72f71ecc983046692b64ca143466a9601dfb95b5 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 28 Apr 2025 22:55:04 +0200 Subject: [PATCH 14/23] fix --- parquet/src/encryption/encrypt.rs | 21 +++++++++++++++++++++ parquet/src/file/metadata/writer.rs | 24 +++++++----------------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 89c242b16dbf..ee9a664b78a7 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -26,6 +26,7 @@ use ring::rand::{SecureRandom, SystemRandom}; use std::collections::{HashMap, HashSet}; use std::io::Write; use thrift::protocol::TCompactOutputProtocol; +use crate::format::{AesGcmV1, EncryptionAlgorithm}; #[derive(Debug, Clone, PartialEq)] struct EncryptionKey { @@ -360,6 +361,26 @@ impl FileEncryptor { Some(column_key) => Ok(Box::new(RingGcmBlockEncryptor::new(column_key.key())?)), } } + + /// Get encryption algorithm for the plaintext footer + pub(crate) fn get_footer_encryptor_for_plaintext(&self) -> Result> { + if !self.properties.encrypt_footer() { + let supply_aad_prefix = self.properties + .aad_prefix() + .map(|_| !self.properties.store_aad_prefix()); + let encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { + aad_prefix: if self.properties.store_aad_prefix() { + self.properties.aad_prefix().cloned() + } else { + None + }, + aad_file_unique: Some(self.aad_file_unique().clone()), + supply_aad_prefix, + })); + return Ok(encryption_algorithm); + } + Ok(None) + } } /// Write an encrypted Thrift serializable object diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 088d23edcc7e..e7b514ef9c96 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -142,23 +142,13 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { .object_writer .apply_row_group_encryption(self.row_groups)?; - let mut encryption_algorithm = None; - if let Some(file_encryptor) = self.object_writer.file_encryptor.clone() { - let properties = file_encryptor.properties(); - if !properties.encrypt_footer() { - let supply_aad_prefix = properties - .aad_prefix() - .map(|_| !properties.store_aad_prefix()); - encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { - aad_prefix: if properties.store_aad_prefix() { - properties.aad_prefix().cloned() - } else { - None - }, - aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), - supply_aad_prefix, - })); - } + #[cfg(not(feature = "encryption"))] + let encryption_algorithm = None; + + #[cfg(feature = "encryption")] + let encryption_algorithm = match &self.object_writer.file_encryptor { + Some(file_encryptor) => file_encryptor.get_footer_encryptor_for_plaintext()?, + _ => None, }; let mut file_metadata = FileMetaData { From 321347c1035cb3e86f16a739abf355dba0e2ee8b Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 19:58:59 +0200 Subject: [PATCH 15/23] Apply suggestions from code review Co-authored-by: Adam Reeve --- parquet/src/encryption/encrypt.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index ee9a664b78a7..4a21fb29af3f 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -395,7 +395,7 @@ pub(crate) fn encrypt_object( Ok(()) } -pub(crate) fn sign_and_write_object( +pub(crate) fn write_signed_plaintext_object( object: &T, encryptor: &mut Box, sink: &mut W, @@ -413,11 +413,11 @@ pub(crate) fn sign_and_write_object( .try_into() .map_err(|err| general_err!("Plaintext data too long. {:?}", err))?; - // Format is: [ciphertext size, nonce, ciphertext, authentication tag] - let nonce = buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN].to_vec(); - let tag = buffer[(ciphertext_length - TAG_LEN as u32) as usize..].to_vec(); - sink.write_all(&nonce)?; - sink.write_all(&tag)?; + // Format of encrypted buffer is: [ciphertext size, nonce, ciphertext, authentication tag] + let nonce = &buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN]; + let tag = &buffer[(buffer.len() - TAG_LEN)..]; + sink.write_all(nonce)?; + sink.write_all(tag)?; Ok(()) } From 99964e0d37c747febbea9937912aec92ba5af78f Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 21:00:34 +0200 Subject: [PATCH 16/23] Fix --- parquet/src/encryption/encrypt.rs | 6 +----- parquet/src/file/metadata/writer.rs | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 4a21fb29af3f..040d421ae82e 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -409,13 +409,9 @@ pub(crate) fn write_signed_plaintext_object( sink.write_all(&buffer)?; buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?; - let ciphertext_length : u32 = buffer.len() - .try_into() - .map_err(|err| general_err!("Plaintext data too long. {:?}", err))?; - // Format of encrypted buffer is: [ciphertext size, nonce, ciphertext, authentication tag] let nonce = &buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN]; - let tag = &buffer[(buffer.len() - TAG_LEN)..]; + let tag = &buffer[buffer.len() - TAG_LEN..]; sink.write_all(nonce)?; sink.write_all(tag)?; diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index e7b514ef9c96..a6ed940488f2 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -17,7 +17,7 @@ #[cfg(feature = "encryption")] use crate::encryption::{ - encrypt::{encrypt_object, encrypt_object_to_vec, sign_and_write_object, FileEncryptor}, + encrypt::{encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor}, modules::{create_footer_aad, create_module_aad, ModuleType}, }; #[cfg(feature = "encryption")] @@ -516,7 +516,7 @@ impl MetadataObjectWriter { Some(file_encryptor) if file_metadata.encryption_algorithm.is_some() => { let aad = create_footer_aad(file_encryptor.file_aad())?; let mut encryptor = file_encryptor.get_footer_encryptor()?; - sign_and_write_object(file_metadata, &mut encryptor, &mut sink, &aad) + write_signed_plaintext_object(file_metadata, &mut encryptor, &mut sink, &aad) } _ => Self::write_object(file_metadata, &mut sink), } From 3768f86df2da4ec1c0e7ee3c394d60b5f3127fe6 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 21:16:36 +0200 Subject: [PATCH 17/23] lint --- parquet/src/encryption/encrypt.rs | 11 +++++++---- parquet/src/file/metadata/writer.rs | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 040d421ae82e..c35fc231be04 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -17,16 +17,18 @@ //! Configuration and utilities for Parquet Modular Encryption -use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN}; +use crate::encryption::ciphers::{ + BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN, +}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey}; +use crate::format::{AesGcmV1, EncryptionAlgorithm}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; use crate::thrift::TSerializable; use ring::rand::{SecureRandom, SystemRandom}; use std::collections::{HashMap, HashSet}; use std::io::Write; use thrift::protocol::TCompactOutputProtocol; -use crate::format::{AesGcmV1, EncryptionAlgorithm}; #[derive(Debug, Clone, PartialEq)] struct EncryptionKey { @@ -361,11 +363,12 @@ impl FileEncryptor { Some(column_key) => Ok(Box::new(RingGcmBlockEncryptor::new(column_key.key())?)), } } - + /// Get encryption algorithm for the plaintext footer pub(crate) fn get_footer_encryptor_for_plaintext(&self) -> Result> { if !self.properties.encrypt_footer() { - let supply_aad_prefix = self.properties + let supply_aad_prefix = self + .properties .aad_prefix() .map(|_| !self.properties.store_aad_prefix()); let encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index a6ed940488f2..50163571317c 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -17,7 +17,9 @@ #[cfg(feature = "encryption")] use crate::encryption::{ - encrypt::{encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor}, + encrypt::{ + encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor, + }, modules::{create_footer_aad, create_module_aad, ModuleType}, }; #[cfg(feature = "encryption")] From c138a540bf7bb01b5ad46da7d14179b2b977e242 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 1 May 2025 22:45:47 +0200 Subject: [PATCH 18/23] Move get_footer_encryption_algorithm into MetadataObjectWriter --- parquet/src/encryption/encrypt.rs | 22 ----------- parquet/src/file/metadata/writer.rs | 60 +++++++++++++++-------------- 2 files changed, 31 insertions(+), 51 deletions(-) diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index c35fc231be04..c8d3ffc0eef4 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -22,7 +22,6 @@ use crate::encryption::ciphers::{ }; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey}; -use crate::format::{AesGcmV1, EncryptionAlgorithm}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; use crate::thrift::TSerializable; use ring::rand::{SecureRandom, SystemRandom}; @@ -363,27 +362,6 @@ impl FileEncryptor { Some(column_key) => Ok(Box::new(RingGcmBlockEncryptor::new(column_key.key())?)), } } - - /// Get encryption algorithm for the plaintext footer - pub(crate) fn get_footer_encryptor_for_plaintext(&self) -> Result> { - if !self.properties.encrypt_footer() { - let supply_aad_prefix = self - .properties - .aad_prefix() - .map(|_| !self.properties.store_aad_prefix()); - let encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { - aad_prefix: if self.properties.store_aad_prefix() { - self.properties.aad_prefix().cloned() - } else { - None - }, - aad_file_unique: Some(self.aad_file_unique().clone()), - supply_aad_prefix, - })); - return Ok(encryption_algorithm); - } - Ok(None) - } } /// Write an encrypted Thrift serializable object diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 50163571317c..7222ddb4a14f 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -28,8 +28,9 @@ use crate::errors::Result; use crate::file::metadata::{KeyValue, ParquetMetaData}; use crate::file::page_index::index::Index; use crate::file::writer::{get_file_magic, TrackedWrite}; +use crate::format::EncryptionAlgorithm; #[cfg(feature = "encryption")] -use crate::format::{AesGcmV1, ColumnCryptoMetaData, EncryptionAlgorithm}; +use crate::format::{AesGcmV1, ColumnCryptoMetaData}; use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex, RowGroup}; use crate::schema::types; use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr}; @@ -144,15 +145,6 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { .object_writer .apply_row_group_encryption(self.row_groups)?; - #[cfg(not(feature = "encryption"))] - let encryption_algorithm = None; - - #[cfg(feature = "encryption")] - let encryption_algorithm = match &self.object_writer.file_encryptor { - Some(file_encryptor) => file_encryptor.get_footer_encryptor_for_plaintext()?, - _ => None, - }; - let mut file_metadata = FileMetaData { num_rows, row_groups, @@ -161,7 +153,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { schema: types::to_thrift(self.schema.as_ref())?, created_by: self.created_by.clone(), column_orders, - encryption_algorithm, + encryption_algorithm: self.object_writer.get_footer_encryption_algorithm(), footer_signing_key_metadata: None, }; @@ -486,6 +478,10 @@ impl MetadataObjectWriter { pub fn get_file_magic(&self) -> &[u8; 4] { get_file_magic() } + + fn get_footer_encryption_algorithm(&self) -> Option { + None + } } /// Implementations of [`MetadataObjectWriter`] methods that rely on encryption being enabled @@ -506,7 +502,7 @@ impl MetadataObjectWriter { match self.file_encryptor.as_ref() { Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => { // First write FileCryptoMetadata - let crypto_metadata = Self::file_crypto_metadata(file_encryptor)?; + let crypto_metadata = self.file_crypto_metadata()?; let mut protocol = TCompactOutputProtocol::new(&mut sink); crypto_metadata.write_to_out_protocol(&mut protocol)?; @@ -639,25 +635,31 @@ impl MetadataObjectWriter { } } - fn file_crypto_metadata( - file_encryptor: &FileEncryptor, - ) -> Result { - let properties = file_encryptor.properties(); - let supply_aad_prefix = properties - .aad_prefix() - .map(|_| !properties.store_aad_prefix()); - let encryption_algorithm = AesGcmV1 { - aad_prefix: if properties.store_aad_prefix() { - properties.aad_prefix().cloned() - } else { - None - }, - aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), - supply_aad_prefix, - }; + fn get_footer_encryption_algorithm(&self) -> Option { + let file_encryptor = self.file_encryptor.as_ref().unwrap(); + if !file_encryptor.properties().encrypt_footer() { + let supply_aad_prefix = file_encryptor + .properties() + .aad_prefix() + .map(|_| !file_encryptor.properties().store_aad_prefix()); + let encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { + aad_prefix: if file_encryptor.properties().store_aad_prefix() { + file_encryptor.properties().aad_prefix().cloned() + } else { + None + }, + aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), + supply_aad_prefix, + })); + return encryption_algorithm; + } + None + } + fn file_crypto_metadata(&self) -> Result { + let properties = self.file_encryptor.as_ref().unwrap().properties(); Ok(crate::format::FileCryptoMetaData { - encryption_algorithm: EncryptionAlgorithm::AESGCMV1(encryption_algorithm), + encryption_algorithm: self.get_footer_encryption_algorithm().unwrap(), key_metadata: properties.footer_key_metadata().cloned(), }) } From c775625b632e880e4a1e78bd4e3d0ac5c6d7ea2a Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 5 May 2025 17:09:29 +0200 Subject: [PATCH 19/23] Fix --- parquet/src/file/metadata/writer.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 7222ddb4a14f..00ecd9518b94 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -636,13 +636,12 @@ impl MetadataObjectWriter { } fn get_footer_encryption_algorithm(&self) -> Option { - let file_encryptor = self.file_encryptor.as_ref().unwrap(); - if !file_encryptor.properties().encrypt_footer() { + if let Some(file_encryptor) = &self.file_encryptor { let supply_aad_prefix = file_encryptor .properties() .aad_prefix() .map(|_| !file_encryptor.properties().store_aad_prefix()); - let encryption_algorithm = Some(EncryptionAlgorithm::AESGCMV1(AesGcmV1 { + let encryption_algorithm = EncryptionAlgorithm::AESGCMV1(AesGcmV1 { aad_prefix: if file_encryptor.properties().store_aad_prefix() { file_encryptor.properties().aad_prefix().cloned() } else { @@ -650,8 +649,8 @@ impl MetadataObjectWriter { }, aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), supply_aad_prefix, - })); - return encryption_algorithm; + }); + return Some(encryption_algorithm); } None } From 1ab252b81303adf70ed56a33f3d396507e389a57 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 May 2025 15:15:19 +0200 Subject: [PATCH 20/23] Review feedback --- parquet/src/file/metadata/reader.rs | 3 ++- parquet/src/file/metadata/writer.rs | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 6edf2e611d42..089347b50fdf 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -17,13 +17,14 @@ use std::{io::Read, ops::Range, sync::Arc}; +use bytes::Bytes; + use crate::basic::ColumnOrder; #[cfg(feature = "encryption")] use crate::encryption::{ decrypt::{FileDecryptionProperties, FileDecryptor}, modules::create_footer_aad, }; -use bytes::Bytes; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData}; diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 00ecd9518b94..56e9f076612b 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -153,7 +153,9 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { schema: types::to_thrift(self.schema.as_ref())?, created_by: self.created_by.clone(), column_orders, - encryption_algorithm: self.object_writer.get_footer_encryption_algorithm(), + encryption_algorithm: MetadataObjectWriter::get_footer_encryption_algorithm( + &self.object_writer, + ), footer_signing_key_metadata: None, }; @@ -479,7 +481,9 @@ impl MetadataObjectWriter { get_file_magic() } - fn get_footer_encryption_algorithm(&self) -> Option { + fn get_footer_encryption_algorithm( + _object_writer: &MetadataObjectWriter, + ) -> Option { None } } @@ -502,7 +506,7 @@ impl MetadataObjectWriter { match self.file_encryptor.as_ref() { Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => { // First write FileCryptoMetadata - let crypto_metadata = self.file_crypto_metadata()?; + let crypto_metadata = Self::file_crypto_metadata(self)?; let mut protocol = TCompactOutputProtocol::new(&mut sink); crypto_metadata.write_to_out_protocol(&mut protocol)?; @@ -635,8 +639,11 @@ impl MetadataObjectWriter { } } - fn get_footer_encryption_algorithm(&self) -> Option { - if let Some(file_encryptor) = &self.file_encryptor { + fn get_footer_encryption_algorithm( + object_writer: &MetadataObjectWriter, + ) -> Option { + let file_encryptor = object_writer.file_encryptor.as_ref(); + if let Some(file_encryptor) = file_encryptor { let supply_aad_prefix = file_encryptor .properties() .aad_prefix() @@ -655,11 +662,13 @@ impl MetadataObjectWriter { None } - fn file_crypto_metadata(&self) -> Result { - let properties = self.file_encryptor.as_ref().unwrap().properties(); + fn file_crypto_metadata( + object_writer: &MetadataObjectWriter, + ) -> Result { + let file_encryptor = object_writer.file_encryptor.as_ref().unwrap(); Ok(crate::format::FileCryptoMetaData { - encryption_algorithm: self.get_footer_encryption_algorithm().unwrap(), - key_metadata: properties.footer_key_metadata().cloned(), + encryption_algorithm: Self::get_footer_encryption_algorithm(object_writer).unwrap(), + key_metadata: file_encryptor.properties().footer_key_metadata().cloned(), }) } From 7fd3c1329fb571d4832232f4012190fc78afe0ca Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Wed, 7 May 2025 09:08:44 +1200 Subject: [PATCH 21/23] Avoid unwraps in file_crypto_metada method --- parquet/src/file/metadata/reader.rs | 3 +- parquet/src/file/metadata/writer.rs | 56 ++++++++++++++--------------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 089347b50fdf..6edf2e611d42 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -17,14 +17,13 @@ use std::{io::Read, ops::Range, sync::Arc}; -use bytes::Bytes; - use crate::basic::ColumnOrder; #[cfg(feature = "encryption")] use crate::encryption::{ decrypt::{FileDecryptionProperties, FileDecryptor}, modules::create_footer_aad, }; +use bytes::Bytes; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData}; diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 56e9f076612b..f41781b62413 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -153,9 +153,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { schema: types::to_thrift(self.schema.as_ref())?, created_by: self.created_by.clone(), column_orders, - encryption_algorithm: MetadataObjectWriter::get_footer_encryption_algorithm( - &self.object_writer, - ), + encryption_algorithm: self.object_writer.get_footer_encryption_algorithm(), footer_signing_key_metadata: None, }; @@ -481,9 +479,7 @@ impl MetadataObjectWriter { get_file_magic() } - fn get_footer_encryption_algorithm( - _object_writer: &MetadataObjectWriter, - ) -> Option { + fn get_footer_encryption_algorithm(&self) -> Option { None } } @@ -506,7 +502,7 @@ impl MetadataObjectWriter { match self.file_encryptor.as_ref() { Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => { // First write FileCryptoMetadata - let crypto_metadata = Self::file_crypto_metadata(self)?; + let crypto_metadata = Self::file_crypto_metadata(file_encryptor)?; let mut protocol = TCompactOutputProtocol::new(&mut sink); crypto_metadata.write_to_out_protocol(&mut protocol)?; @@ -639,36 +635,36 @@ impl MetadataObjectWriter { } } - fn get_footer_encryption_algorithm( - object_writer: &MetadataObjectWriter, - ) -> Option { - let file_encryptor = object_writer.file_encryptor.as_ref(); - if let Some(file_encryptor) = file_encryptor { - let supply_aad_prefix = file_encryptor - .properties() - .aad_prefix() - .map(|_| !file_encryptor.properties().store_aad_prefix()); - let encryption_algorithm = EncryptionAlgorithm::AESGCMV1(AesGcmV1 { - aad_prefix: if file_encryptor.properties().store_aad_prefix() { - file_encryptor.properties().aad_prefix().cloned() - } else { - None - }, - aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), - supply_aad_prefix, - }); - return Some(encryption_algorithm); + fn get_footer_encryption_algorithm(&self) -> Option { + if let Some(file_encryptor) = &self.file_encryptor { + return Some(Self::encryption_algorithm_from_encryptor(file_encryptor)); } None } + fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) -> EncryptionAlgorithm { + let supply_aad_prefix = file_encryptor + .properties() + .aad_prefix() + .map(|_| !file_encryptor.properties().store_aad_prefix()); + EncryptionAlgorithm::AESGCMV1(AesGcmV1 { + aad_prefix: if file_encryptor.properties().store_aad_prefix() { + file_encryptor.properties().aad_prefix().cloned() + } else { + None + }, + aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), + supply_aad_prefix, + }) + } + fn file_crypto_metadata( - object_writer: &MetadataObjectWriter, + file_encryptor: &FileEncryptor, ) -> Result { - let file_encryptor = object_writer.file_encryptor.as_ref().unwrap(); + let properties = file_encryptor.properties(); Ok(crate::format::FileCryptoMetaData { - encryption_algorithm: Self::get_footer_encryption_algorithm(object_writer).unwrap(), - key_metadata: file_encryptor.properties().footer_key_metadata().cloned(), + encryption_algorithm: Self::encryption_algorithm_from_encryptor(file_encryptor), + key_metadata: properties.footer_key_metadata().cloned(), }) } From 713e0bc3f4b085f265441e9424a81d94983d0e61 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Wed, 7 May 2025 09:23:32 +1200 Subject: [PATCH 22/23] Minor tidy --- parquet/src/file/metadata/writer.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index f41781b62413..a01ad5d881a5 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -647,12 +647,13 @@ impl MetadataObjectWriter { .properties() .aad_prefix() .map(|_| !file_encryptor.properties().store_aad_prefix()); + let aad_prefix = if file_encryptor.properties().store_aad_prefix() { + file_encryptor.properties().aad_prefix().cloned() + } else { + None + }; EncryptionAlgorithm::AESGCMV1(AesGcmV1 { - aad_prefix: if file_encryptor.properties().store_aad_prefix() { - file_encryptor.properties().aad_prefix().cloned() - } else { - None - }, + aad_prefix, aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), supply_aad_prefix, }) From 5457472815276c717fa1a99e8e809039790bef4d Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 7 May 2025 13:58:12 +0200 Subject: [PATCH 23/23] Test wrting and reading with a different footer key --- parquet/tests/encryption/encryption.rs | 57 ++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index 184bc99a5648..134e3383b3eb 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -433,18 +433,23 @@ fn test_write_non_uniform_encryption() { read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties); } -// todo: currently we raise if writing with plaintext footer, but we should support it -// for uniform and non-uniform encryption (see https://github.com/apache/arrow-rs/issues/7320) #[test] fn test_write_uniform_encryption_plaintext_footer() { let testdata = arrow::util::test_util::parquet_test_data(); let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); let footer_key = b"0123456789012345".to_vec(); // 128bit/16 + let wrong_footer_key = b"0000000000000000".to_vec(); // 128bit/16 let column_1_key = b"1234567890123450".to_vec(); let column_2_key = b"1234567890123451".to_vec(); let decryption_properties = FileDecryptionProperties::builder(footer_key.clone()) + .with_column_key("double_field", column_1_key.clone()) + .with_column_key("float_field", column_2_key.clone()) + .build() + .unwrap(); + + let wrong_decryption_properties = FileDecryptionProperties::builder(wrong_footer_key) .with_column_key("double_field", column_1_key) .with_column_key("float_field", column_2_key) .build() @@ -455,7 +460,53 @@ fn test_write_uniform_encryption_plaintext_footer() { .build() .unwrap(); - read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties); + // Try writing plaintext footer and then reading it with the correct footer key + read_and_roundtrip_to_encrypted_file( + &path, + decryption_properties.clone(), + file_encryption_properties.clone(), + ); + + // Try writing plaintext footer and then reading it with the wrong footer key + let temp_file = tempfile::tempfile().unwrap(); + + // read example data + let file = File::open(path).unwrap(); + let options = ArrowReaderOptions::default() + .with_file_decryption_properties(decryption_properties.clone()); + let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); + + let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let batch_reader = builder.build().unwrap(); + let batches = batch_reader + .collect::, _>>() + .unwrap(); + + // write example data + let props = WriterProperties::builder() + .with_file_encryption_properties(file_encryption_properties) + .build(); + + let mut writer = ArrowWriter::try_new( + temp_file.try_clone().unwrap(), + metadata.schema().clone(), + Some(props), + ) + .unwrap(); + for batch in batches { + writer.write(&batch).unwrap(); + } + writer.close().unwrap(); + + // Try reading plaintext footer and with the wrong footer key + let options = + ArrowReaderOptions::default().with_file_decryption_properties(wrong_decryption_properties); + let result = ArrowReaderMetadata::load(&temp_file, options.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .starts_with("Parquet error: Footer signature verification failed. Computed: [")); } #[test]