Skip to content

Commit

Permalink
Check for nested files when calculating the total
Browse files Browse the repository at this point in the history
Fixes #12
  • Loading branch information
marcospb19 committed May 2, 2021
1 parent 964ddfd commit 2197511
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 2 deletions.
93 changes: 93 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use number_prefix::NumberPrefix;
use walkdir::WalkDir;

use std::{
collections::BTreeMap,
fmt::Display,
path::{Path, PathBuf},
};
Expand Down Expand Up @@ -91,3 +92,95 @@ pub fn format_size(size: u64, binary: bool) -> String {
NumberPrefix::Prefixed(prefix, number) => format!("{:.2} {}B", number, prefix),
}
}

/// Calculate the sum of sizes of all entries
///
/// Does not account twice the size of entries that are within other entries.
/// Implementation uses BTreeMap to build a path Trie.
pub fn calculate_unique_total_size(entries: &[Entry]) -> u64 {
let mut filtered_entries = Vec::<&Entry>::new();
let mut canonicalized_paths = Vec::<PathBuf>::new();

// Canonicalize each path, silently ignoring failures.
// TODO: Review if we should ignore failures.
for entry in entries {
if let Ok(path) = entry.path.canonicalize() {
filtered_entries.push(entry);
canonicalized_paths.push(path);
}
}

#[derive(PartialEq, Eq, PartialOrd, Ord)]
struct TriePathNode {
// Children nodes of this current path, accessed by path.
children: BTreeMap<PathBuf, TriePathNode>,
// Size of the file that ends at this node.
node_size: u64,
}

let mut trie_root = TriePathNode {
children: BTreeMap::new(),
node_size: 0,
};

// For each entry/path, add it to the Trie if it wasn't already inserted.
//
// If the Trie receives a folder that is parent of a previously added file, then just consider
// the parent folder, removing the childs, this way, we do not count them twice towards the
// final total.
for (i, entry) in filtered_entries.iter().enumerate() {
let path = &canonicalized_paths[i];

// Necessary because we need to check when it's the last path piece.
let mut path_iter = path.iter().peekable();
// Pointer to traverse the tree.
let mut current_trie_node = &mut trie_root;
// Size to be added at the endif the current entry isn't children of any other.
let size_of_current_file = entry.size;

while let Some(piece) = path_iter.next() {
// Query for the node in the Trie which matches the current path piece.
let entry = current_trie_node.children.entry(PathBuf::from(piece));

// Keeps track if the current entry is child of another previously found.
let mut already_considered = false;
let next_trie_node = entry
.and_modify(|_| {
// If we are in this block, it means that the node size was already considered
// because a parent of it was inserted. So we will skip this file.
already_considered = true;
})
// Add a node with 0 size, which may be changed after if it is the last piece.
.or_insert(TriePathNode {
children: BTreeMap::new(),
node_size: 0,
});

// Skipping already accounted file, because it is nested inside of another one.
if already_considered {
break;
}

// If we are at the last piece of the current entry path, it means that this is the tip
// that finally represents the file, and which path is the full file path.
let is_the_last_piece = path_iter.peek().is_none();
if is_the_last_piece {
// Update the size of this piece.
next_trie_node.node_size = size_of_current_file;
// Drop all the childrens so that their sizes won't be added.
next_trie_node.children.clear();
}

// Update the pointer to keep traversing the trie.
current_trie_node = next_trie_node;
}
}

fn trie_recursive_sum(node: &TriePathNode) -> u64 {
let children_sum: u64 = node.children.values().map(trie_recursive_sum).sum();
node.node_size + children_sum
}

// Traverse the trie tree to calculate the sum
trie_recursive_sum(&trie_root)
}
4 changes: 2 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use wild;

mod cli;
use cli::Cli;
use durt::{format_size, Entry};
use durt::{calculate_unique_total_size, format_size, Entry};

fn main() {
#[cfg(windows)]
Expand Down Expand Up @@ -60,7 +60,7 @@ fn main() {
Table::new(" {:>} {:<}")
};

let total_size = entries.iter().map(|e| e.size).sum();
let total_size = calculate_unique_total_size(&entries);
let mut omitted_entries = 0;

for entry in entries {
Expand Down

0 comments on commit 2197511

Please sign in to comment.