diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index 18a6f5776d6b..5764694675ff 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -23,12 +23,14 @@ use ring::rand::{SecureRandom, SystemRandom}; use std::fmt::Debug; const RIGHT_TWELVE: u128 = 0x0000_0000_ffff_ffff_ffff_ffff_ffff_ffff; -const NONCE_LEN: usize = 12; -const TAG_LEN: usize = 16; -const SIZE_LEN: usize = 4; +pub(crate) const NONCE_LEN: usize = 12; +pub(crate) const TAG_LEN: usize = 16; +pub(crate) const SIZE_LEN: usize = 4; pub(crate) trait BlockDecryptor: Debug + Send + Sync { fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result>; + + fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &[u8]) -> Result>; } #[derive(Debug, Clone)] @@ -63,6 +65,19 @@ impl BlockDecryptor for RingGcmBlockDecryptor { result.resize(result.len() - TAG_LEN, 0u8); Ok(result) } + + fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &[u8]) -> Result> { + let mut plaintext = plaintext.to_vec(); + let nonce = &plaintext[plaintext.len() - NONCE_LEN - TAG_LEN..plaintext.len() - TAG_LEN]; + let nonce = ring::aead::Nonce::try_assume_unique_for_key(nonce)?; + let plaintext_end = plaintext.len() - NONCE_LEN - TAG_LEN; + let tag = self.key.seal_in_place_separate_tag( + nonce, + Aad::from(aad), + &mut plaintext[..plaintext_end], + )?; + Ok(tag.as_ref().to_vec()) + } } pub(crate) trait BlockEncryptor: Debug + Send + Sync { diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index 6a51f1a6570e..2cb6cccc002e 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -17,8 +17,8 @@ //! Configuration and utilities for decryption of files using Parquet Modular Encryption -use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor}; -use crate::encryption::modules::{create_module_aad, ModuleType}; +use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor, TAG_LEN}; +use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::ColumnCryptoMetaData; use std::borrow::Cow; @@ -331,6 +331,7 @@ impl PartialEq for DecryptionKeys { pub struct FileDecryptionProperties { keys: DecryptionKeys, aad_prefix: Option>, + footer_signature_verification: bool, } impl FileDecryptionProperties { @@ -351,6 +352,11 @@ impl FileDecryptionProperties { self.aad_prefix.as_ref() } + /// Returns true if footer signature verification is enabled for files with plaintext footers. + pub fn check_plaintext_footer_integrity(&self) -> bool { + self.footer_signature_verification + } + /// Get the encryption key for decrypting a file's footer, /// and also column data if uniform encryption is used. pub fn footer_key(&self, key_metadata: Option<&[u8]>) -> Result>> { @@ -415,6 +421,7 @@ pub struct DecryptionPropertiesBuilder { key_retriever: Option>, column_keys: HashMap>, aad_prefix: Option>, + footer_signature_verification: bool, } impl DecryptionPropertiesBuilder { @@ -426,6 +433,7 @@ impl DecryptionPropertiesBuilder { key_retriever: None, column_keys: HashMap::default(), aad_prefix: None, + footer_signature_verification: true, } } @@ -439,6 +447,7 @@ impl DecryptionPropertiesBuilder { key_retriever: Some(key_retriever), column_keys: HashMap::default(), aad_prefix: None, + footer_signature_verification: true, } } @@ -464,6 +473,7 @@ impl DecryptionPropertiesBuilder { Ok(FileDecryptionProperties { keys, aad_prefix: self.aad_prefix, + footer_signature_verification: self.footer_signature_verification, }) } @@ -496,6 +506,13 @@ impl DecryptionPropertiesBuilder { } Ok(self) } + + /// Disable verification of footer tags for files that use plaintext footers. + /// Signature verification is enabled by default. + pub fn disable_footer_signature_verification(mut self) -> Self { + self.footer_signature_verification = false; + self + } } #[derive(Clone, Debug)] @@ -538,6 +555,25 @@ impl FileDecryptor { Ok(self.footer_decryptor.clone()) } + /// Verify the signature of the footer + pub(crate) fn verify_plaintext_footer_signature(&self, plaintext_footer: &[u8]) -> Result<()> { + // Plaintext footer format is: [plaintext metadata, nonce, authentication tag] + let tag = &plaintext_footer[plaintext_footer.len() - TAG_LEN..]; + let aad = create_footer_aad(self.file_aad())?; + let footer_decryptor = self.get_footer_decryptor()?; + + let computed_tag = footer_decryptor.compute_plaintext_tag(&aad, plaintext_footer)?; + + if computed_tag != tag { + return Err(general_err!( + "Footer signature verification failed. Computed: {:?}, Expected: {:?}", + computed_tag, + tag + )); + } + Ok(()) + } + pub(crate) fn get_column_data_decryptor( &self, column_name: &str, diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 9a801434c0db..c8d3ffc0eef4 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -17,7 +17,9 @@ //! Configuration and utilities for Parquet Modular Encryption -use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor}; +use crate::encryption::ciphers::{ + BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN, +}; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; @@ -374,6 +376,29 @@ pub(crate) fn encrypt_object( Ok(()) } +pub(crate) fn write_signed_plaintext_object( + object: &T, + encryptor: &mut Box, + sink: &mut W, + module_aad: &[u8], +) -> Result<()> { + let mut buffer: Vec = vec![]; + { + let mut protocol = TCompactOutputProtocol::new(&mut buffer); + object.write_to_out_protocol(&mut protocol)?; + } + sink.write_all(&buffer)?; + buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?; + + // Format of encrypted buffer is: [ciphertext size, nonce, ciphertext, authentication tag] + let nonce = &buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN]; + let tag = &buffer[buffer.len() - TAG_LEN..]; + sink.write_all(nonce)?; + sink.write_all(tag)?; + + Ok(()) +} + /// Encrypt a Thrift serializable object to a byte vector pub(crate) fn encrypt_object_to_vec( object: &T, diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index e496cae4dead..d5877aa4566a 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -1999,7 +1999,7 @@ mod tests { #[cfg(not(feature = "encryption"))] let base_expected_size = 2312; #[cfg(feature = "encryption")] - let base_expected_size = 2640; + let base_expected_size = 2648; assert_eq!(parquet_meta.memory_size(), base_expected_size); @@ -2029,7 +2029,7 @@ mod tests { #[cfg(not(feature = "encryption"))] let bigger_expected_size = 2816; #[cfg(feature = "encryption")] - let bigger_expected_size = 3144; + let bigger_expected_size = 3152; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size); diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 00b6b2d4f545..6edf2e611d42 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -17,14 +17,13 @@ use std::{io::Read, ops::Range, sync::Arc}; -use bytes::Bytes; - use crate::basic::ColumnOrder; #[cfg(feature = "encryption")] use crate::encryption::{ decrypt::{FileDecryptionProperties, FileDecryptor}, modules::create_footer_aad, }; +use bytes::Bytes; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData}; @@ -967,11 +966,15 @@ impl ParquetMetaDataReader { file_decryption_properties, ) { // File has a plaintext footer but encryption algorithm is set - file_decryptor = Some(get_file_decryptor( + let file_decryptor_value = get_file_decryptor( algo, t_file_metadata.footer_signing_key_metadata.as_deref(), file_decryption_properties, - )?); + )?; + if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { + file_decryptor_value.verify_plaintext_footer_signature(buf)?; + } + file_decryptor = Some(file_decryptor_value); } let mut row_groups = Vec::new(); diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index c1fc41314415..a01ad5d881a5 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -16,17 +16,21 @@ // under the License. #[cfg(feature = "encryption")] -use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, FileEncryptor}; -#[cfg(feature = "encryption")] -use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType}; +use crate::encryption::{ + encrypt::{ + encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor, + }, + modules::{create_footer_aad, create_module_aad, ModuleType}, +}; #[cfg(feature = "encryption")] use crate::errors::ParquetError; use crate::errors::Result; use crate::file::metadata::{KeyValue, ParquetMetaData}; use crate::file::page_index::index::Index; use crate::file::writer::{get_file_magic, TrackedWrite}; +use crate::format::EncryptionAlgorithm; #[cfg(feature = "encryption")] -use crate::format::{AesGcmV1, ColumnCryptoMetaData, EncryptionAlgorithm}; +use crate::format::{AesGcmV1, ColumnCryptoMetaData}; use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex, RowGroup}; use crate::schema::types; use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr}; @@ -149,7 +153,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { schema: types::to_thrift(self.schema.as_ref())?, created_by: self.created_by.clone(), column_orders, - encryption_algorithm: None, + encryption_algorithm: self.object_writer.get_footer_encryption_algorithm(), footer_signing_key_metadata: None, }; @@ -474,6 +478,10 @@ impl MetadataObjectWriter { pub fn get_file_magic(&self) -> &[u8; 4] { get_file_magic() } + + fn get_footer_encryption_algorithm(&self) -> Option { + None + } } /// Implementations of [`MetadataObjectWriter`] methods that rely on encryption being enabled @@ -503,6 +511,11 @@ impl MetadataObjectWriter { let mut encryptor = file_encryptor.get_footer_encryptor()?; encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad) } + Some(file_encryptor) if file_metadata.encryption_algorithm.is_some() => { + let aad = create_footer_aad(file_encryptor.file_aad())?; + let mut encryptor = file_encryptor.get_footer_encryptor()?; + write_signed_plaintext_object(file_metadata, &mut encryptor, &mut sink, &aad) + } _ => Self::write_object(file_metadata, &mut sink), } } @@ -622,25 +635,36 @@ impl MetadataObjectWriter { } } - fn file_crypto_metadata( - file_encryptor: &FileEncryptor, - ) -> Result { - let properties = file_encryptor.properties(); - let supply_aad_prefix = properties + fn get_footer_encryption_algorithm(&self) -> Option { + if let Some(file_encryptor) = &self.file_encryptor { + return Some(Self::encryption_algorithm_from_encryptor(file_encryptor)); + } + None + } + + fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) -> EncryptionAlgorithm { + let supply_aad_prefix = file_encryptor + .properties() .aad_prefix() - .map(|_| !properties.store_aad_prefix()); - let encryption_algorithm = AesGcmV1 { - aad_prefix: if properties.store_aad_prefix() { - properties.aad_prefix().cloned() - } else { - None - }, + .map(|_| !file_encryptor.properties().store_aad_prefix()); + let aad_prefix = if file_encryptor.properties().store_aad_prefix() { + file_encryptor.properties().aad_prefix().cloned() + } else { + None + }; + EncryptionAlgorithm::AESGCMV1(AesGcmV1 { + aad_prefix, aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), supply_aad_prefix, - }; + }) + } + fn file_crypto_metadata( + file_encryptor: &FileEncryptor, + ) -> Result { + let properties = file_encryptor.properties(); Ok(crate::format::FileCryptoMetaData { - encryption_algorithm: EncryptionAlgorithm::AESGCMV1(encryption_algorithm), + encryption_algorithm: Self::encryption_algorithm_from_encryptor(file_encryptor), key_metadata: properties.footer_key_metadata().cloned(), }) } diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 18e357ebc2b9..0298d8a51df6 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -212,12 +212,6 @@ impl SerializedFileWriter { if let Some(file_encryption_properties) = &properties.file_encryption_properties { file_encryption_properties.validate_encrypted_column_names(schema_descriptor)?; - if !file_encryption_properties.encrypt_footer() { - return Err(general_err!( - "Writing encrypted files with plaintext footers is not supported yet" - )); - } - Ok(Some(Arc::new(FileEncryptor::new( file_encryption_properties.clone(), )?))) diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index 664850e507da..134e3383b3eb 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -60,6 +60,43 @@ fn test_non_uniform_encryption_plaintext_footer() { verify_encryption_test_file_read(file, decryption_properties); } +#[test] +fn test_plaintext_footer_signature_verification() { + let test_data = arrow::util::test_util::parquet_test_data(); + let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted"); + let file = File::open(path.clone()).unwrap(); + + let footer_key = "0000000000000000".as_bytes(); // 128bit/16 + let column_1_key = "1234567890123450".as_bytes(); + let column_2_key = "1234567890123451".as_bytes(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .disable_footer_signature_verification() + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + verify_encryption_test_file_read(file, decryption_properties); + + let file = File::open(path.clone()).unwrap(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) + .with_column_key("double_field", column_1_key.to_vec()) + .with_column_key("float_field", column_2_key.to_vec()) + .build() + .unwrap(); + + let options = ArrowReaderOptions::default() + .with_file_decryption_properties(decryption_properties.clone()); + let result = ArrowReaderMetadata::load(&file, options.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .starts_with("Parquet error: Footer signature verification failed. Computed: [")); +} + #[test] fn test_non_uniform_encryption_disabled_aad_storage() { let test_data = arrow::util::test_util::parquet_test_data(); @@ -396,18 +433,23 @@ fn test_write_non_uniform_encryption() { read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties); } -// todo: currently we raise if writing with plaintext footer, but we should support it -// for uniform and non-uniform encryption (see https://github.com/apache/arrow-rs/issues/7320) #[test] fn test_write_uniform_encryption_plaintext_footer() { let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted"); + let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted"); let footer_key = b"0123456789012345".to_vec(); // 128bit/16 + let wrong_footer_key = b"0000000000000000".to_vec(); // 128bit/16 let column_1_key = b"1234567890123450".to_vec(); let column_2_key = b"1234567890123451".to_vec(); let decryption_properties = FileDecryptionProperties::builder(footer_key.clone()) + .with_column_key("double_field", column_1_key.clone()) + .with_column_key("float_field", column_2_key.clone()) + .build() + .unwrap(); + + let wrong_decryption_properties = FileDecryptionProperties::builder(wrong_footer_key) .with_column_key("double_field", column_1_key) .with_column_key("float_field", column_2_key) .build() @@ -418,26 +460,53 @@ fn test_write_uniform_encryption_plaintext_footer() { .build() .unwrap(); + // Try writing plaintext footer and then reading it with the correct footer key + read_and_roundtrip_to_encrypted_file( + &path, + decryption_properties.clone(), + file_encryption_properties.clone(), + ); + + // Try writing plaintext footer and then reading it with the wrong footer key + let temp_file = tempfile::tempfile().unwrap(); + + // read example data let file = File::open(path).unwrap(); let options = ArrowReaderOptions::default() .with_file_decryption_properties(decryption_properties.clone()); let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); + let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let batch_reader = builder.build().unwrap(); + let batches = batch_reader + .collect::, _>>() + .unwrap(); + + // write example data let props = WriterProperties::builder() .with_file_encryption_properties(file_encryption_properties) .build(); - let temp_file = tempfile::tempfile().unwrap(); - let writer = ArrowWriter::try_new( + let mut writer = ArrowWriter::try_new( temp_file.try_clone().unwrap(), metadata.schema().clone(), Some(props), - ); - assert!(writer.is_err()); - assert_eq!( - writer.unwrap_err().to_string(), - "Parquet error: Writing encrypted files with plaintext footers is not supported yet" ) + .unwrap(); + for batch in batches { + writer.write(&batch).unwrap(); + } + writer.close().unwrap(); + + // Try reading plaintext footer and with the wrong footer key + let options = + ArrowReaderOptions::default().with_file_decryption_properties(wrong_decryption_properties); + let result = ArrowReaderMetadata::load(&temp_file, options.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .starts_with("Parquet error: Footer signature verification failed. Computed: [")); } #[test]