Skip to content

Commit

Permalink
[2.x.x] simplify txhashset zip creation and extraction (#2908)
Browse files Browse the repository at this point in the history
* simplify how we extract files from the txhashset zip archive

* bump version of zip-rs
cleanup create_zip (was compress)
use explicit list of files when creating zip archive

* rework zip tests for our simplified impl

* periodic debug during txhashset download

* rework list_files to return something usable
  • Loading branch information
antiochp authored Jul 12, 2019
1 parent 3a47526 commit 1395074
Show file tree
Hide file tree
Showing 9 changed files with 224 additions and 392 deletions.
19 changes: 4 additions & 15 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

180 changes: 43 additions & 137 deletions chain/src/txhashset/txhashset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ use crate::util::secp::pedersen::{Commitment, RangeProof};
use crate::util::{file, secp_static, zip};
use croaring::Bitmap;
use grin_store;
use grin_store::pmmr::{clean_files_by_prefix, PMMRBackend, PMMR_FILES};
use std::collections::HashSet;
use grin_store::pmmr::{clean_files_by_prefix, PMMRBackend};
use std::fs::{self, File};
use std::path::{Path, PathBuf};
use std::sync::Arc;
Expand Down Expand Up @@ -1457,11 +1456,13 @@ pub fn zip_read(root_dir: String, header: &BlockHeader) -> Result<File, Error> {
}
// Copy file to another dir
file::copy_dir_to(&txhashset_path, &temp_txhashset_path)?;
// Check and remove file that are not supposed to be there
check_and_remove_files(&temp_txhashset_path, header)?;
// Compress zip
zip::compress(&temp_txhashset_path, &File::create(zip_path.clone())?)
.map_err(|ze| ErrorKind::Other(ze.to_string()))?;

let zip_file = File::create(zip_path.clone())?;

// Explicit list of files to add to our zip archive.
let files = file_list(header);

zip::create_zip(&zip_file, &temp_txhashset_path, files)?;

temp_txhashset_path
};
Expand All @@ -1480,6 +1481,30 @@ pub fn zip_read(root_dir: String, header: &BlockHeader) -> Result<File, Error> {
Ok(zip_file)
}

// Explicit list of files to extract from our zip archive.
// We include *only* these files when building the txhashset zip.
// We extract *only* these files when receiving a txhashset zip.
// Everything else will be safely ignored.
// Return Vec<PathBuf> as some of these are dynamic (specifically the "rewound" leaf files).
fn file_list(header: &BlockHeader) -> Vec<PathBuf> {
vec![
// kernel MMR
PathBuf::from("kernel/pmmr_data.bin"),
PathBuf::from("kernel/pmmr_hash.bin"),
// output MMR
PathBuf::from("output/pmmr_data.bin"),
PathBuf::from("output/pmmr_hash.bin"),
PathBuf::from("output/pmmr_prun.bin"),
// rangeproof MMR
PathBuf::from("rangeproof/pmmr_data.bin"),
PathBuf::from("rangeproof/pmmr_hash.bin"),
PathBuf::from("rangeproof/pmmr_prun.bin"),
// Header specific "rewound" leaf files for output and rangeproof MMR.
PathBuf::from(format!("output/pmmr_leaf.bin.{}", header.hash())),
PathBuf::from(format!("rangeproof/pmmr_leaf.bin.{}", header.hash())),
]
}

/// Extract the txhashset data from a zip file and writes the content into the
/// txhashset storage dir
pub fn zip_write(
Expand All @@ -1489,10 +1514,17 @@ pub fn zip_write(
) -> Result<(), Error> {
debug!("zip_write on path: {:?}", root_dir);
let txhashset_path = root_dir.clone().join(TXHASHSET_SUBDIR);
fs::create_dir_all(txhashset_path.clone())?;
zip::decompress(txhashset_data, &txhashset_path, expected_file)
.map_err(|ze| ErrorKind::Other(ze.to_string()))?;
check_and_remove_files(&txhashset_path, header)
fs::create_dir_all(&txhashset_path)?;

// Explicit list of files to extract from our zip archive.
let files = file_list(header);

// We expect to see *exactly* the paths listed above.
// No attempt is made to be permissive or forgiving with "alternative" paths.
// These are the *only* files we will attempt to extract from the zip file.
// If any of these are missing we will attempt to continue as some are potentially optional.
zip::extract_files(txhashset_data, &txhashset_path, files)?;
Ok(())
}

/// Overwrite txhashset folders in "to" folder with "from" folder
Expand Down Expand Up @@ -1537,112 +1569,6 @@ pub fn clean_header_folder(root_dir: &PathBuf) {
}
}

fn expected_file(path: &Path) -> bool {
use lazy_static::lazy_static;
use regex::Regex;
let s_path = path.to_str().unwrap_or_else(|| "");
lazy_static! {
static ref RE: Regex = Regex::new(
format!(
r#"^({}|{}|{})((/|\\)pmmr_(hash|data|leaf|prun)\.bin(\.\w*)?)?$"#,
OUTPUT_SUBDIR, KERNEL_SUBDIR, RANGE_PROOF_SUBDIR
)
.as_str()
)
.expect("invalid txhashset regular expression");
}
RE.is_match(&s_path)
}

/// Check a txhashset directory and remove any unexpected
fn check_and_remove_files(txhashset_path: &PathBuf, header: &BlockHeader) -> Result<(), Error> {
// First compare the subdirectories
let subdirectories_expected: HashSet<_> = [OUTPUT_SUBDIR, KERNEL_SUBDIR, RANGE_PROOF_SUBDIR]
.iter()
.cloned()
.map(|s| String::from(s))
.collect();

let subdirectories_found: HashSet<_> = fs::read_dir(txhashset_path)?
.filter_map(|entry| {
entry.ok().and_then(|e| {
e.path()
.file_name()
.and_then(|n| n.to_str().map(|s| String::from(s)))
})
})
.collect();

let dir_difference: Vec<String> = subdirectories_found
.difference(&subdirectories_expected)
.cloned()
.collect();

// Removing unexpected directories if needed
if !dir_difference.is_empty() {
debug!("Unexpected folder(s) found in txhashset folder, removing.");
for diff in dir_difference {
let diff_path = txhashset_path.join(diff);
file::delete(diff_path)?;
}
}

// Then compare the files found in the subdirectories
let pmmr_files_expected: HashSet<_> = PMMR_FILES
.iter()
.cloned()
.map(|s| {
if s.contains("pmmr_leaf.bin") {
format!("{}.{}", s, header.hash())
} else {
String::from(s)
}
})
.collect();

let subdirectories = fs::read_dir(txhashset_path)?;
for subdirectory in subdirectories {
let subdirectory_path = subdirectory?.path();
let pmmr_files = fs::read_dir(&subdirectory_path)?;
let pmmr_files_found: HashSet<_> = pmmr_files
.filter_map(|entry| {
entry.ok().and_then(|e| {
e.path()
.file_name()
.and_then(|n| n.to_str().map(|s| String::from(s)))
})
})
.collect();
let difference: Vec<String> = pmmr_files_found
.difference(&pmmr_files_expected)
.cloned()
.collect();
let mut removed = 0;
if !difference.is_empty() {
for diff in &difference {
let diff_path = subdirectory_path.join(diff);
match file::delete(diff_path.clone()) {
Err(e) => error!(
"check_and_remove_files: fail to remove file '{:?}', Err: {:?}",
diff_path, e,
),
Ok(_) => {
removed += 1;
trace!("check_and_remove_files: file '{:?}' removed", diff_path);
}
}
}
debug!(
"{} tmp file(s) found in txhashset subfolder {:?}, {} removed.",
difference.len(),
&subdirectory_path,
removed,
);
}
}
Ok(())
}

/// Given a block header to rewind to and the block header at the
/// head of the current chain state, we need to calculate the positions
/// of all inputs (spent outputs) we need to "undo" during a rewind.
Expand Down Expand Up @@ -1694,23 +1620,3 @@ pub fn input_pos_to_rewind(

bitmap_fast_or(None, &mut block_input_bitmaps).ok_or_else(|| ErrorKind::Bitmap.into())
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_expected_files() {
assert!(!expected_file(Path::new("kernels")));
assert!(!expected_file(Path::new("xkernel")));
assert!(expected_file(Path::new("kernel")));
assert!(expected_file(Path::new("kernel\\pmmr_data.bin")));
assert!(expected_file(Path::new("kernel/pmmr_hash.bin")));
assert!(expected_file(Path::new("kernel/pmmr_leaf.bin")));
assert!(expected_file(Path::new("kernel/pmmr_prun.bin")));
assert!(expected_file(Path::new("kernel/pmmr_leaf.bin.deadbeef")));
assert!(!expected_file(Path::new("xkernel/pmmr_data.bin")));
assert!(!expected_file(Path::new("kernel/pmmrx_data.bin")));
assert!(!expected_file(Path::new("kernel/pmmr_data.binx")));
}
}
Loading

0 comments on commit 1395074

Please sign in to comment.