Skip to content

Commit

Permalink
[1.1.0] Support variable size PMMR (optional size_file) (#2734)
Browse files Browse the repository at this point in the history
* Introduce optional size_file.
Support fixed size data file via an optional elmt_size.
Support variable size data file via optional size_file.

* remember to release the size_file

* fix scoping for windows support
  • Loading branch information
antiochp authored and ignopeverell committed Apr 12, 2019
1 parent 9e8210c commit 3d817f6
Show file tree
Hide file tree
Showing 8 changed files with 522 additions and 252 deletions.
14 changes: 9 additions & 5 deletions chain/src/txhashset/txhashset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,15 @@ impl<T: PMMRable> PMMRHandle<T> {
sub_dir: &str,
file_name: &str,
prunable: bool,
fixed_size: bool,
header: Option<&BlockHeader>,
) -> Result<PMMRHandle<T>, Error> {
let path = Path::new(root_dir).join(sub_dir).join(file_name);
fs::create_dir_all(path.clone())?;
let path_str = path.to_str().ok_or(Error::from(ErrorKind::Other(
"invalid file path".to_owned(),
)))?;
let backend = PMMRBackend::new(path_str.to_string(), prunable, header)?;
let backend = PMMRBackend::new(path_str.to_string(), prunable, fixed_size, header)?;
let last_pos = backend.unpruned_size();
Ok(PMMRHandle { backend, last_pos })
}
Expand Down Expand Up @@ -121,34 +122,39 @@ impl TxHashSet {
HEADERHASHSET_SUBDIR,
HEADER_HEAD_SUBDIR,
false,
true,
None,
)?,
sync_pmmr_h: PMMRHandle::new(
&root_dir,
HEADERHASHSET_SUBDIR,
SYNC_HEAD_SUBDIR,
false,
true,
None,
)?,
output_pmmr_h: PMMRHandle::new(
&root_dir,
TXHASHSET_SUBDIR,
OUTPUT_SUBDIR,
true,
true,
header,
)?,
rproof_pmmr_h: PMMRHandle::new(
&root_dir,
TXHASHSET_SUBDIR,
RANGE_PROOF_SUBDIR,
true,
true,
header,
)?,
kernel_pmmr_h: PMMRHandle::new(
&root_dir,
TXHASHSET_SUBDIR,
KERNEL_SUBDIR,
false,
false, // not prunable
false, // variable size kernel data file
None,
)?,
commit_index,
Expand Down Expand Up @@ -696,9 +702,7 @@ impl<'a> HeaderExtension<'a> {
/// including the genesis block header.
pub fn truncate(&mut self) -> Result<(), Error> {
debug!("Truncating header extension.");
self.pmmr
.rewind(0, &Bitmap::create())
.map_err(&ErrorKind::TxHashSetErr)?;
self.pmmr.truncate().map_err(&ErrorKind::TxHashSetErr)?;
Ok(())
}

Expand Down
1 change: 1 addition & 0 deletions core/src/core/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ impl fmt::Display for Error {
/// Header entry for storing in the header MMR.
/// Note: we hash the block header itself and maintain the hash in the entry.
/// This allows us to lookup the original header from the db as necessary.
#[derive(Debug)]
pub struct HeaderEntry {
hash: Hash,
timestamp: u64,
Expand Down
7 changes: 7 additions & 0 deletions core/src/core/pmmr/pmmr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ where
Ok(())
}

/// Truncate the MMR by rewinding back to empty state.
pub fn truncate(&mut self) -> Result<(), String> {
self.backend.rewind(0, &Bitmap::create())?;
self.last_pos = 0;
Ok(())
}

/// Rewind the PMMR to a previous position, as if all push operations after
/// that had been canceled. Expects a position in the PMMR to rewind and
/// bitmaps representing the positions added and removed that we want to
Expand Down
2 changes: 1 addition & 1 deletion core/src/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ pub trait FixedLength {
pub trait PMMRable: Writeable + Clone + Debug + DefaultHashable {
/// The type of element actually stored in the MMR data file.
/// This allows us to store Hash elements in the header MMR for variable size BlockHeaders.
type E: FixedLength + Readable + Writeable;
type E: FixedLength + Readable + Writeable + Debug;

/// Convert the pmmrable into the element to be stored in the MMR data file.
fn as_elmt(&self) -> Self::E;
Expand Down
86 changes: 47 additions & 39 deletions store/src/pmmr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use std::{fs, io, time};
use crate::core::core::hash::{Hash, Hashed};
use crate::core::core::pmmr::{self, family, Backend};
use crate::core::core::BlockHeader;
use crate::core::ser::PMMRable;
use crate::core::ser::{FixedLength, PMMRable};
use crate::leaf_set::LeafSet;
use crate::prune_list::PruneList;
use crate::types::DataFile;
Expand All @@ -29,6 +29,7 @@ const PMMR_HASH_FILE: &str = "pmmr_hash.bin";
const PMMR_DATA_FILE: &str = "pmmr_data.bin";
const PMMR_LEAF_FILE: &str = "pmmr_leaf.bin";
const PMMR_PRUN_FILE: &str = "pmmr_prun.bin";
const PMMR_SIZE_FILE: &str = "pmmr_size.bin";
const REWIND_FILE_CLEANUP_DURATION_SECONDS: u64 = 60 * 60 * 24; // 24 hours as seconds

/// The list of PMMR_Files for internal purposes
Expand Down Expand Up @@ -64,13 +65,8 @@ impl<T: PMMRable> Backend<T> for PMMRBackend<T> {
/// Add the new leaf pos to our leaf_set if this is a prunable MMR.
#[allow(unused_variables)]
fn append(&mut self, data: &T, hashes: Vec<Hash>) -> Result<(), String> {
if self.prunable {
let shift = self.prune_list.get_total_shift();
let position = self.hash_file.size_unsync() + shift + 1;
self.leaf_set.add(position);
}

self.data_file
let size = self
.data_file
.append(&data.as_elmt())
.map_err(|e| format!("Failed to append data to file. {}", e))?;

Expand All @@ -79,6 +75,14 @@ impl<T: PMMRable> Backend<T> for PMMRBackend<T> {
.append(h)
.map_err(|e| format!("Failed to append hash to file. {}", e))?;
}

if self.prunable {
// (Re)calculate the latest pos given updated size of data file
// and the total leaf_shift, and add to our leaf_set.
let pos = pmmr::insertion_to_pmmr_index(size + self.prune_list.get_total_leaf_shift());
self.leaf_set.add(pos);
}

Ok(())
}

Expand All @@ -91,6 +95,9 @@ impl<T: PMMRable> Backend<T> for PMMRBackend<T> {
}

fn get_data_from_file(&self, position: u64) -> Option<T::E> {
if !pmmr::is_leaf(position) {
return None;
}
if self.is_compacted(position) {
return None;
}
Expand Down Expand Up @@ -194,11 +201,26 @@ impl<T: PMMRable> PMMRBackend<T> {
pub fn new<P: AsRef<Path>>(
data_dir: P,
prunable: bool,
fixed_size: bool,
header: Option<&BlockHeader>,
) -> io::Result<PMMRBackend<T>> {
let data_dir = data_dir.as_ref();
let hash_file = DataFile::open(&data_dir.join(PMMR_HASH_FILE))?;
let data_file = DataFile::open(&data_dir.join(PMMR_DATA_FILE))?;

// We either have a fixed size *or* a path to a file for tracking sizes.
let (elmt_size, size_path) = if fixed_size {
(Some(T::E::LEN as u16), None)
} else {
(None, Some(data_dir.join(PMMR_SIZE_FILE)))
};

// Hash file is always "fixed size" and we use 32 bytes per hash.
let hash_file =
DataFile::open(&data_dir.join(PMMR_HASH_FILE), None, Some(Hash::LEN as u16))?;
let data_file = DataFile::open(
&data_dir.join(PMMR_DATA_FILE),
size_path.as_ref(),
elmt_size,
)?;

let leaf_set_path = data_dir.join(PMMR_LEAF_FILE);

Expand All @@ -219,8 +241,8 @@ impl<T: PMMRable> PMMRBackend<T> {
Ok(PMMRBackend {
data_dir: data_dir.to_path_buf(),
prunable,
hash_file: hash_file,
data_file: data_file,
hash_file,
data_file,
leaf_set,
prune_list,
})
Expand All @@ -238,12 +260,10 @@ impl<T: PMMRable> PMMRBackend<T> {
self.is_pruned(pos) && !self.is_pruned_root(pos)
}

/// Number of elements in the PMMR stored by this backend. Only produces the
/// Number of hashes in the PMMR stored by this backend. Only produces the
/// fully sync'd size.
pub fn unpruned_size(&self) -> u64 {
let total_shift = self.prune_list.get_total_shift();
let sz = self.hash_file.size();
sz + total_shift
self.hash_size() + self.prune_list.get_total_shift()
}

/// Number of elements in the underlying stored data. Extremely dependent on
Expand All @@ -261,14 +281,14 @@ impl<T: PMMRable> PMMRBackend<T> {
/// Syncs all files to disk. A call to sync is required to ensure all the
/// data has been successfully written to disk.
pub fn sync(&mut self) -> io::Result<()> {
self.hash_file
.flush()
Ok(())
.and(self.hash_file.flush())
.and(self.data_file.flush())
.and(self.leaf_set.flush())
.map_err(|e| {
io::Error::new(
io::ErrorKind::Interrupted,
format!("Could not write to state storage, disk full? {:?}", e),
format!("Could not sync pmmr to disk: {:?}", e),
)
})
}
Expand All @@ -292,24 +312,18 @@ impl<T: PMMRable> PMMRBackend<T> {
pub fn check_compact(&mut self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> io::Result<bool> {
assert!(self.prunable, "Trying to compact a non-prunable PMMR");

// Paths for tmp hash and data files.
let tmp_prune_file_hash =
format!("{}.hashprune", self.data_dir.join(PMMR_HASH_FILE).display());
let tmp_prune_file_data =
format!("{}.dataprune", self.data_dir.join(PMMR_DATA_FILE).display());
// Calculate the sets of leaf positions and node positions to remove based
// on the cutoff_pos provided.
let (leaves_removed, pos_to_rm) = self.pos_to_rm(cutoff_pos, rewind_rm_pos);

// 1. Save compact copy of the hash file, skipping removed data.
{
let off_to_rm = map_vec!(pos_to_rm, |pos| {
let pos_to_rm = map_vec!(pos_to_rm, |pos| {
let shift = self.prune_list.get_shift(pos.into());
pos as u64 - 1 - shift
pos as u64 - shift
});

self.hash_file
.save_prune(&tmp_prune_file_hash, &off_to_rm)?;
self.hash_file.save_prune(&pos_to_rm)?;
}

// 2. Save compact copy of the data file, skipping removed leaves.
Expand All @@ -320,14 +334,13 @@ impl<T: PMMRable> PMMRBackend<T> {
.map(|x| x as u64)
.collect::<Vec<_>>();

let off_to_rm = map_vec!(leaf_pos_to_rm, |&pos| {
let pos_to_rm = map_vec!(leaf_pos_to_rm, |&pos| {
let flat_pos = pmmr::n_leaves(pos);
let shift = self.prune_list.get_leaf_shift(pos);
(flat_pos - 1 - shift)
flat_pos - shift
});

self.data_file
.save_prune(&tmp_prune_file_data, &off_to_rm)?;
self.data_file.save_prune(&pos_to_rm)?;
}

// 3. Update the prune list and write to disk.
Expand All @@ -337,17 +350,12 @@ impl<T: PMMRable> PMMRBackend<T> {
}
self.prune_list.flush()?;
}
// 4. Rename the compact copy of hash file and reopen it.
self.hash_file.replace(Path::new(&tmp_prune_file_hash))?;

// 5. Rename the compact copy of the data file and reopen it.
self.data_file.replace(Path::new(&tmp_prune_file_data))?;

// 6. Write the leaf_set to disk.
// 4. Write the leaf_set to disk.
// Optimize the bitmap storage in the process.
self.leaf_set.flush()?;

// 7. cleanup rewind files
// 5. cleanup rewind files
self.clean_rewind_files()?;

Ok(true)
Expand Down
7 changes: 7 additions & 0 deletions store/src/prune_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,17 @@ impl PruneList {
}

/// Return the total shift from all entries in the prune_list.
/// This is the shift we need to account for when adding new entries to our PMMR.
pub fn get_total_shift(&self) -> u64 {
self.get_shift(self.bitmap.maximum() as u64)
}

/// Return the total leaf_shift from all entries in the prune_list.
/// This is the leaf_shift we need to account for when adding new entries to our PMMR.
pub fn get_total_leaf_shift(&self) -> u64 {
self.get_leaf_shift(self.bitmap.maximum() as u64)
}

/// Computes by how many positions a node at pos should be shifted given the
/// number of nodes that have already been pruned before it.
/// Note: the node at pos may be pruned and may be compacted away itself and
Expand Down
Loading

0 comments on commit 3d817f6

Please sign in to comment.