@@ -3,6 +3,7 @@ use number_prefix::NumberPrefix;
3
3
use walkdir:: WalkDir ;
4
4
5
5
use std:: {
6
+ collections:: BTreeMap ,
6
7
fmt:: Display ,
7
8
path:: { Path , PathBuf } ,
8
9
} ;
@@ -31,6 +32,7 @@ impl<T, E: Display> ResultExt<T, E> for Result<T, E> {
31
32
}
32
33
}
33
34
35
+ #[ derive( PartialEq , Eq , PartialOrd , Ord ) ]
34
36
pub struct Entry {
35
37
pub path : PathBuf ,
36
38
pub size : u64 ,
@@ -91,3 +93,105 @@ pub fn format_size(size: u64, binary: bool) -> String {
91
93
NumberPrefix :: Prefixed ( prefix, number) => format ! ( "{:.2} {}B" , number, prefix) ,
92
94
}
93
95
}
96
+
97
+ /// Calculate the sum of sizes of all entries
98
+ ///
99
+ /// Ignore nested files when calculating the total
100
+ ///
101
+ /// For the nested files:
102
+ /// - `folder/ (5 MB)`
103
+ /// - `folder/big_file (15 MB)`
104
+ ///
105
+ /// The is 15 MB instead of 20 MB because the inner file is inside of the
106
+ /// folder that was also received as an argument
107
+ ///
108
+ /// Implemented with the Trie data structure, made of HashMap and PathBufs
109
+ /// that represent each path components of the canonicalized file paths
110
+ pub fn calculate_unique_total_size ( entries : & [ Entry ] ) -> u64 {
111
+ // Sorted to guarantee that files can only come after their parent directories
112
+ let sorted_entries = {
113
+ let mut entries_and_paths: Vec < ( PathBuf , & Entry ) > = vec ! [ ] ;
114
+
115
+ // Canonicalize each path, reporting and skipping errors
116
+ for entry in entries {
117
+ let canonical_path = entry. path . canonicalize ( ) . log_err ( Some ( & entry. path ) ) ;
118
+ if let Ok ( path) = canonical_path {
119
+ entries_and_paths. push ( ( path, entry) ) ;
120
+ }
121
+ }
122
+ entries_and_paths. sort_unstable ( ) ;
123
+ entries_and_paths
124
+ } ;
125
+
126
+ #[ derive( PartialEq , Eq , PartialOrd , Ord ) ]
127
+ struct TriePathNode {
128
+ // Children nodes of this current path, accessed by path
129
+ children : BTreeMap < PathBuf , TriePathNode > ,
130
+ // Size of the file that ends at this node
131
+ node_size : u64 ,
132
+ }
133
+
134
+ let mut trie_root = TriePathNode {
135
+ children : BTreeMap :: new ( ) ,
136
+ node_size : 0 ,
137
+ } ;
138
+
139
+ // For each entry/path, add it to the Trie if it wasn't already inserted
140
+ //
141
+ // If the Trie receives a folder that is parent of a previously added file, then just consider
142
+ // the parent folder, removing the childs, this way, we do not count them twice towards the
143
+ // final total
144
+ for ( path, entry) in sorted_entries {
145
+ // Necessary because we need to check when it's the last path piece
146
+ let mut path_iter = path. iter ( ) . peekable ( ) ;
147
+ // Pointer to traverse the tree
148
+ let mut current_trie_node = & mut trie_root;
149
+ // Size to be added at the endif the current entry isn't children of any other
150
+ let size_of_current_file = entry. size ;
151
+
152
+ while let Some ( piece) = path_iter. next ( ) {
153
+ // Query for the node in the Trie which matches the current path piece
154
+ let entry = current_trie_node. children . entry ( PathBuf :: from ( piece) ) ;
155
+
156
+ // Keeps track if the current entry is child of another previously found
157
+ let mut already_considered = false ;
158
+ let next_trie_node = entry
159
+ . and_modify ( |_| {
160
+ // If we are in this block, it means that the node size was already considered
161
+ // because a parent of it was inserted. So we will skip this file
162
+ already_considered = true ;
163
+ } )
164
+ // Add a node with 0 size, which may be changed after if it is the last piece
165
+ . or_insert ( TriePathNode {
166
+ children : BTreeMap :: new ( ) ,
167
+ node_size : 0 ,
168
+ } ) ;
169
+
170
+ // Skipping already accounted file, because it is nested inside of another one
171
+ if already_considered {
172
+ break ;
173
+ }
174
+
175
+ // If we are at the last piece of the current entry path, it means that this is the tip
176
+ // that finally represents the file, and which path is the full file path
177
+ let is_the_last_piece = path_iter. peek ( ) . is_none ( ) ;
178
+ if is_the_last_piece {
179
+ // Update the size of this piece
180
+ next_trie_node. node_size = size_of_current_file;
181
+ // Drop all the childrens so that their sizes won't be added
182
+ next_trie_node. children . clear ( ) ;
183
+ }
184
+
185
+ // Update the pointer to keep traversing the trie
186
+ current_trie_node = next_trie_node;
187
+ }
188
+ }
189
+
190
+ fn trie_recursive_sum ( node : & TriePathNode ) -> u64 {
191
+ let children_sum: u64 = node. children . values ( ) . map ( trie_recursive_sum) . sum ( ) ;
192
+ node. node_size + children_sum
193
+ }
194
+
195
+ // Traverse the trie tree to calculate the sum
196
+ trie_recursive_sum ( & trie_root)
197
+ }
0 commit comments