-
Notifications
You must be signed in to change notification settings - Fork 1k
[TieredStorage] Enable file hash in TieredStorage #170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,6 +1,7 @@ | ||||||||||||||||||
| use { | ||||||||||||||||||
| super::{error::TieredStorageError, TieredStorageResult}, | ||||||||||||||||||
| super::{error::TieredStorageError, footer::FOOTER_TAIL_SIZE, TieredStorageResult}, | ||||||||||||||||||
| bytemuck::{AnyBitPattern, NoUninit, Pod, Zeroable}, | ||||||||||||||||||
| solana_sdk::hash::Hash, | ||||||||||||||||||
| std::{ | ||||||||||||||||||
| fs::{File, OpenOptions}, | ||||||||||||||||||
| io::{BufWriter, Read, Result as IoResult, Seek, SeekFrom, Write}, | ||||||||||||||||||
|
|
@@ -38,17 +39,37 @@ impl TieredReadableFile { | |||||||||||||||||
| ); | ||||||||||||||||||
|
|
||||||||||||||||||
| file.check_magic_number()?; | ||||||||||||||||||
| file.check_file_hash()?; | ||||||||||||||||||
|
|
||||||||||||||||||
| Ok(file) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| pub fn new_writable(file_path: impl AsRef<Path>) -> IoResult<Self> { | ||||||||||||||||||
| Ok(Self( | ||||||||||||||||||
| OpenOptions::new() | ||||||||||||||||||
| .create_new(true) | ||||||||||||||||||
| .write(true) | ||||||||||||||||||
| .open(file_path)?, | ||||||||||||||||||
| )) | ||||||||||||||||||
| fn check_file_hash(&self) -> TieredStorageResult<()> { | ||||||||||||||||||
| self.seek(0)?; | ||||||||||||||||||
|
|
||||||||||||||||||
| let len = self.0.metadata()?.len() as usize; | ||||||||||||||||||
| let hashed_len = len - std::mem::size_of::<Hash>() - FOOTER_TAIL_SIZE; | ||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs to handle wrapping/overflow. Please use safe math here. |
||||||||||||||||||
| let mut hasher = blake3::Hasher::new(); | ||||||||||||||||||
|
|
||||||||||||||||||
| const BLOCK_SIZE: usize = 4096; | ||||||||||||||||||
| let mut buffer = [0u8; BLOCK_SIZE]; | ||||||||||||||||||
| let mut offset = 0; | ||||||||||||||||||
| while offset < hashed_len { | ||||||||||||||||||
| let block_size = std::cmp::min(BLOCK_SIZE, hashed_len - offset); | ||||||||||||||||||
| self.read_bytes(&mut buffer[0..block_size])?; | ||||||||||||||||||
| hasher.update(&buffer[0..block_size]); | ||||||||||||||||||
|
|
||||||||||||||||||
| offset += block_size; | ||||||||||||||||||
| } | ||||||||||||||||||
| let hash = Hash::new_from_array(hasher.finalize().into()); | ||||||||||||||||||
|
|
||||||||||||||||||
| let mut raw_hash_from_file = [0u8; 32]; | ||||||||||||||||||
| self.read_bytes(&mut raw_hash_from_file)?; | ||||||||||||||||||
| let hash_from_file = Hash::new_from_array(raw_hash_from_file); | ||||||||||||||||||
|
|
||||||||||||||||||
| (hash == hash_from_file) | ||||||||||||||||||
| .then(|| Ok(())) | ||||||||||||||||||
| .unwrap_or_else(|| Err(TieredStorageError::FileHashMismatch(hash, hash_from_file))) | ||||||||||||||||||
|
Comment on lines
+70
to
+72
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably simpler to do:
Suggested change
|
||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| fn check_magic_number(&self) -> TieredStorageResult<()> { | ||||||||||||||||||
|
|
@@ -105,16 +126,22 @@ impl TieredReadableFile { | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| #[derive(Debug)] | ||||||||||||||||||
| pub struct TieredWritableFile(pub BufWriter<File>); | ||||||||||||||||||
| pub struct TieredWritableFile { | ||||||||||||||||||
| file: BufWriter<File>, | ||||||||||||||||||
| hasher: blake3::Hasher, | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| impl TieredWritableFile { | ||||||||||||||||||
| pub fn new(file_path: impl AsRef<Path>) -> IoResult<Self> { | ||||||||||||||||||
| Ok(Self(BufWriter::new( | ||||||||||||||||||
| OpenOptions::new() | ||||||||||||||||||
| .create_new(true) | ||||||||||||||||||
| .write(true) | ||||||||||||||||||
| .open(file_path)?, | ||||||||||||||||||
| ))) | ||||||||||||||||||
| Ok(Self { | ||||||||||||||||||
| file: BufWriter::new( | ||||||||||||||||||
| OpenOptions::new() | ||||||||||||||||||
| .create_new(true) | ||||||||||||||||||
| .write(true) | ||||||||||||||||||
| .open(file_path)?, | ||||||||||||||||||
| ), | ||||||||||||||||||
| hasher: blake3::Hasher::new(), | ||||||||||||||||||
| }) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /// Writes `value` to the file. | ||||||||||||||||||
|
|
@@ -142,33 +169,71 @@ impl TieredWritableFile { | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| pub fn seek(&mut self, offset: u64) -> IoResult<u64> { | ||||||||||||||||||
| self.0.seek(SeekFrom::Start(offset)) | ||||||||||||||||||
| self.file.seek(SeekFrom::Start(offset)) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| pub fn seek_from_end(&mut self, offset: i64) -> IoResult<u64> { | ||||||||||||||||||
| self.0.seek(SeekFrom::End(offset)) | ||||||||||||||||||
| self.file.seek(SeekFrom::End(offset)) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| pub fn write_bytes(&mut self, bytes: &[u8]) -> IoResult<usize> { | ||||||||||||||||||
| self.0.write_all(bytes)?; | ||||||||||||||||||
| self.file.write_all(bytes)?; | ||||||||||||||||||
| self.hasher.update(bytes); | ||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it'll be good to benchmark how much time this adds. A normal mnb slot probably has ~1000 accounts written, and each account is about 200 bytes of account data. I think a benchmark comparing with and without the hash would be good. If this hash is relatively expensive, we may need to consider an alternative impl. |
||||||||||||||||||
|
|
||||||||||||||||||
| Ok(bytes.len()) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| pub fn hash(&self) -> Hash { | ||||||||||||||||||
| Hash::new_from_array(self.hasher.finalize().into()) | ||||||||||||||||||
| } | ||||||||||||||||||
|
Comment on lines
+186
to
+188
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if this function is called multiple times? Iow, is it safe to call |
||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| #[cfg(test)] | ||||||||||||||||||
| mod tests { | ||||||||||||||||||
| use { | ||||||||||||||||||
| crate::tiered_storage::{ | ||||||||||||||||||
| error::TieredStorageError, | ||||||||||||||||||
| file::{TieredReadableFile, TieredWritableFile, FILE_MAGIC_NUMBER}, | ||||||||||||||||||
| file::{ | ||||||||||||||||||
| TieredReadableFile, TieredStorageMagicNumber, TieredWritableFile, FILE_MAGIC_NUMBER, | ||||||||||||||||||
| }, | ||||||||||||||||||
| footer::TieredStorageFooter, | ||||||||||||||||||
| }, | ||||||||||||||||||
| std::path::Path, | ||||||||||||||||||
| tempfile::TempDir, | ||||||||||||||||||
| }; | ||||||||||||||||||
|
|
||||||||||||||||||
| // Only write the footer and update the hash without writing the magic number | ||||||||||||||||||
| fn write_footer_only(footer: &mut TieredStorageFooter, file: &mut TieredWritableFile) { | ||||||||||||||||||
| // SAFETY: The footer does not contain any uninitialized bytes. | ||||||||||||||||||
| unsafe { | ||||||||||||||||||
| file.write_type(&footer.account_meta_format).unwrap(); | ||||||||||||||||||
| file.write_type(&footer.owners_block_format).unwrap(); | ||||||||||||||||||
| file.write_type(&footer.index_block_format).unwrap(); | ||||||||||||||||||
| file.write_type(&footer.account_block_format).unwrap(); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| file.write_pod(&footer.account_entry_count).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.account_meta_entry_size).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.account_block_size).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.owner_count).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.owner_entry_size).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.index_block_offset).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.owners_block_offset).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.min_account_address).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.max_account_address).unwrap(); | ||||||||||||||||||
|
|
||||||||||||||||||
| // everything before the FooterTail will be hashed | ||||||||||||||||||
| footer.hash = file.hash(); | ||||||||||||||||||
| file.write_pod(&footer.hash).unwrap(); | ||||||||||||||||||
|
|
||||||||||||||||||
| file.write_pod(&footer.format_version).unwrap(); | ||||||||||||||||||
| file.write_pod(&footer.footer_size).unwrap(); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| fn generate_test_file_with_number(path: impl AsRef<Path>, number: u64) { | ||||||||||||||||||
| let mut file = TieredWritableFile::new(path).unwrap(); | ||||||||||||||||||
| let mut footer = TieredStorageFooter::default(); | ||||||||||||||||||
| write_footer_only(&mut footer, &mut file); | ||||||||||||||||||
| file.write_pod(&number).unwrap(); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
@@ -190,4 +255,56 @@ mod tests { | |||||||||||||||||
| Err(TieredStorageError::MagicNumberMismatch(_, _)) | ||||||||||||||||||
| )); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| #[test] | ||||||||||||||||||
| fn test_file_hash() { | ||||||||||||||||||
| let temp_dir = TempDir::new().unwrap(); | ||||||||||||||||||
| let path = temp_dir.path().join("test_file_hash_mismatch"); | ||||||||||||||||||
| let mut expected_footer = TieredStorageFooter { | ||||||||||||||||||
| account_entry_count: 300, | ||||||||||||||||||
| account_meta_entry_size: 24, | ||||||||||||||||||
| account_block_size: 4096, | ||||||||||||||||||
| owner_count: 250, | ||||||||||||||||||
| owner_entry_size: 32, | ||||||||||||||||||
| index_block_offset: 1069600, | ||||||||||||||||||
| owners_block_offset: 1081200, | ||||||||||||||||||
| ..TieredStorageFooter::default() | ||||||||||||||||||
| }; | ||||||||||||||||||
|
|
||||||||||||||||||
| // Manually persist the footer without updating the hash | ||||||||||||||||||
| { | ||||||||||||||||||
| let mut file = TieredWritableFile::new(&path).unwrap(); | ||||||||||||||||||
| // SAFETY: the footer does not contain any uninitialized bytes | ||||||||||||||||||
| unsafe { | ||||||||||||||||||
| file.write_type(&expected_footer).unwrap(); | ||||||||||||||||||
| } | ||||||||||||||||||
| file.write_pod(&TieredStorageMagicNumber::default()) | ||||||||||||||||||
| .unwrap(); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Reopen the same storage and expect FileHashMismatch | ||||||||||||||||||
| { | ||||||||||||||||||
| let result = TieredReadableFile::new(&path); | ||||||||||||||||||
| assert!(matches!( | ||||||||||||||||||
| result, | ||||||||||||||||||
| Err(TieredStorageError::FileHashMismatch(_, _)) | ||||||||||||||||||
| )); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Rewrite the same footer into a different file, but this time using | ||||||||||||||||||
| // the standard way to persist the footer that will also update the | ||||||||||||||||||
| // file hash. | ||||||||||||||||||
| let path = temp_dir.path().join("test_file_hash"); | ||||||||||||||||||
| { | ||||||||||||||||||
| let mut file = TieredWritableFile::new(&path).unwrap(); | ||||||||||||||||||
| expected_footer.write_footer_block(&mut file).unwrap() | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Reopen the same storage and expect the footer matches | ||||||||||||||||||
| { | ||||||||||||||||||
| let file = TieredReadableFile::new(&path).unwrap(); | ||||||||||||||||||
| let footer = TieredStorageFooter::new_from_footer_block(&file).unwrap(); | ||||||||||||||||||
| assert_eq!(expected_footer, footer); | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ use { | |
| thiserror::Error, | ||
| }; | ||
|
|
||
| pub const FOOTER_FORMAT_VERSION: u64 = 1; | ||
| pub const FOOTER_FORMAT_VERSION: u64 = 2; | ||
|
|
||
| /// The size of the footer struct + the magic number at the end. | ||
| pub const FOOTER_SIZE: usize = | ||
|
|
@@ -159,9 +159,9 @@ impl Default for TieredStorageFooter { | |
| owner_entry_size: 0, | ||
| index_block_offset: 0, | ||
| owners_block_offset: 0, | ||
| hash: Hash::new_unique(), | ||
| min_account_address: Pubkey::default(), | ||
| max_account_address: Pubkey::default(), | ||
| hash: Hash::default(), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Up on line 111, where the struct is defined, I think we should use our own newtype for the hash. i.e.: struct TieredStorageFooterHash(Hash)(or some other name) And likely derive all the same things that |
||
| format_version: FOOTER_FORMAT_VERSION, | ||
| footer_size: FOOTER_SIZE as u64, | ||
| } | ||
|
|
@@ -174,9 +174,33 @@ impl TieredStorageFooter { | |
| Self::new_from_footer_block(&file) | ||
| } | ||
|
|
||
| pub fn write_footer_block(&self, file: &mut TieredWritableFile) -> TieredStorageResult<()> { | ||
| /// Presist the footer to the specified TieredStorageWritableFile. | ||
| /// The `hash` field of the footer will also be updated. | ||
| pub fn write_footer_block(&mut self, file: &mut TieredWritableFile) -> TieredStorageResult<()> { | ||
| // SAFETY: The footer does not contain any uninitialized bytes. | ||
| unsafe { file.write_type(self)? }; | ||
| unsafe { | ||
| file.write_type(&self.account_meta_format)?; | ||
| file.write_type(&self.owners_block_format)?; | ||
| file.write_type(&self.index_block_format)?; | ||
| file.write_type(&self.account_block_format)?; | ||
| } | ||
|
|
||
| file.write_pod(&self.account_entry_count)?; | ||
| file.write_pod(&self.account_meta_entry_size)?; | ||
| file.write_pod(&self.account_block_size)?; | ||
| file.write_pod(&self.owner_count)?; | ||
| file.write_pod(&self.owner_entry_size)?; | ||
| file.write_pod(&self.index_block_offset)?; | ||
| file.write_pod(&self.owners_block_offset)?; | ||
| file.write_pod(&self.min_account_address)?; | ||
| file.write_pod(&self.max_account_address)?; | ||
|
|
||
| // everything before the FooterTail will be hashed | ||
| self.hash = file.hash(); | ||
| file.write_pod(&self.hash)?; | ||
|
|
||
| file.write_pod(&self.format_version)?; | ||
| file.write_pod(&self.footer_size)?; | ||
|
Comment on lines
+199
to
+203
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @brooksprumo: I think we can consider reordering the items in the footer tail so that the format version and footer-size are also hashed. And, together with your previous comment #195 (comment). If we also exclude hash or even footer tail from the footer, then we can directly do What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems reasonable to me. So like a two-part footer. The top is hashed, and the bottom is not hashed. The bottom is where the hash itself lives, along with the magic number/etc. Is that right?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right! |
||
| file.write_pod(&TieredStorageMagicNumber::default())?; | ||
|
|
||
| Ok(()) | ||
|
|
@@ -313,13 +337,12 @@ mod tests { | |
| append_vec::test_utils::get_append_vec_path, tiered_storage::file::TieredWritableFile, | ||
| }, | ||
| memoffset::offset_of, | ||
| solana_sdk::hash::Hash, | ||
| }; | ||
|
|
||
| #[test] | ||
| fn test_footer() { | ||
| let path = get_append_vec_path("test_file_footer"); | ||
| let expected_footer = TieredStorageFooter { | ||
| let mut expected_footer = TieredStorageFooter { | ||
| account_meta_format: AccountMetaFormat::Hot, | ||
| owners_block_format: OwnersBlockFormat::AddressesOnly, | ||
| index_block_format: IndexBlockFormat::AddressesThenOffsets, | ||
|
|
@@ -331,12 +354,15 @@ mod tests { | |
| owner_entry_size: 32, | ||
| index_block_offset: 1069600, | ||
| owners_block_offset: 1081200, | ||
| hash: Hash::new_unique(), | ||
| min_account_address: Pubkey::default(), | ||
| max_account_address: Pubkey::new_unique(), | ||
| format_version: FOOTER_FORMAT_VERSION, | ||
| footer_size: FOOTER_SIZE as u64, | ||
| ..TieredStorageFooter::default() | ||
| }; | ||
| // Copy the original footer as the we will update the hash field of | ||
| // the expected footer. | ||
| let raw_footer = expected_footer; | ||
|
|
||
| // Persist the expected footer. | ||
| { | ||
|
|
@@ -349,6 +375,29 @@ mod tests { | |
| { | ||
| let footer = TieredStorageFooter::new_from_path(&path.path).unwrap(); | ||
| assert_eq!(expected_footer, footer); | ||
|
|
||
| // Expect everything is the same as the original footer except the hash | ||
| assert_eq!(footer.account_meta_format, raw_footer.account_meta_format); | ||
| assert_eq!(footer.owners_block_format, raw_footer.owners_block_format); | ||
| assert_eq!(footer.index_block_format, raw_footer.index_block_format); | ||
| assert_eq!(footer.account_block_format, raw_footer.account_block_format); | ||
| assert_eq!(footer.account_entry_count, raw_footer.account_entry_count); | ||
| assert_eq!( | ||
| footer.account_meta_entry_size, | ||
| raw_footer.account_meta_entry_size | ||
| ); | ||
| assert_eq!(footer.account_block_size, raw_footer.account_block_size); | ||
| assert_eq!(footer.owner_count, raw_footer.owner_count); | ||
| assert_eq!(footer.owner_entry_size, raw_footer.owner_entry_size); | ||
| assert_eq!(footer.index_block_offset, raw_footer.index_block_offset); | ||
| assert_eq!(footer.owners_block_offset, raw_footer.owners_block_offset); | ||
| assert_eq!(footer.min_account_address, raw_footer.min_account_address); | ||
| assert_eq!(footer.max_account_address, raw_footer.max_account_address); | ||
| assert_eq!(footer.format_version, raw_footer.format_version); | ||
| assert_eq!(footer.footer_size, raw_footer.footer_size); | ||
|
|
||
| // Expect hash mismatch as the footer should include an updated hash | ||
| assert_ne!(footer.hash, raw_footer.hash); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this error message be updated to indicate which one was read from the file and which one was computed?