@@ -17,216 +17,14 @@ pub mod traverse;
1717///
1818pub mod from_offsets;
1919
20- /// An item stored within the [`Tree`] whose data is stored in a pack file, identified by
21- /// the offset of its first (`offset`) and last (`next_offset`) bytes.
22- ///
23- /// It represents either a root entry, or one that relies on a base to be resolvable,
24- /// alongside associated `data` `T`.
25- pub struct Item < T > {
26- /// The offset into the pack file at which the pack entry's data is located.
27- pub offset : crate :: data:: Offset ,
28- /// The offset of the next item in the pack file.
29- pub next_offset : crate :: data:: Offset ,
30- /// Data to store with each Item, effectively data associated with each entry in a pack.
31- pub data : T ,
32- /// Indices into our Tree's `items`, one for each pack entry that depends on us.
33- ///
34- /// Limited to u32 as that's the maximum amount of objects in a pack.
35- // SAFETY INVARIANT:
36- // - only one Item in a tree may have any given child index. `future_child_offsets`
37- // should also not contain any indices found in `children`.\
38- // - These indices should be in bounds for tree.child_items
39- children : Vec < u32 > ,
40- }
41-
42- /// Identify what kind of node we have last seen
43- enum NodeKind {
44- Root ,
45- Child ,
46- }
47-
48- /// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
49- /// while being shareable among threads without a lock.
50- /// It does this by making the guarantee that iteration only happens once.
51- pub struct Tree < T > {
52- /// The root nodes, i.e. base objects
53- // SAFETY invariant: see Item.children
54- root_items : Vec < Item < T > > ,
55- /// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
56- // SAFETY invariant: see Item.children
57- child_items : Vec < Item < T > > ,
58- /// The last encountered node was either a root or a child.
59- last_seen : Option < NodeKind > ,
60- /// Future child offsets, associating their offset into the pack with their index in the items array.
61- /// (parent_offset, child_index)
62- // SAFETY invariant:
63- // - None of these child indices should already have parents
64- // i.e. future_child_offsets[i].1 should never be also found
65- // in Item.children. Indices should be found here at most once.
66- // - These indices should be in bounds for tree.child_items.
67- future_child_offsets : Vec < ( crate :: data:: Offset , usize ) > ,
68- }
69-
70- impl < T > Tree < T > {
71- /// Instantiate a empty tree capable of storing `num_objects` amounts of items.
72- pub fn with_capacity ( num_objects : usize ) -> Result < Self , Error > {
73- Ok ( Tree {
74- root_items : Vec :: with_capacity ( num_objects / 2 ) ,
75- child_items : Vec :: with_capacity ( num_objects / 2 ) ,
76- last_seen : None ,
77- future_child_offsets : Vec :: new ( ) ,
78- } )
79- }
80-
81- fn num_items ( & self ) -> usize {
82- self . root_items . len ( ) + self . child_items . len ( )
83- }
84-
85- fn assert_is_incrementing_and_update_next_offset ( & mut self , offset : crate :: data:: Offset ) -> Result < ( ) , Error > {
86- let items = match & self . last_seen {
87- Some ( NodeKind :: Root ) => & mut self . root_items ,
88- Some ( NodeKind :: Child ) => & mut self . child_items ,
89- None => return Ok ( ( ) ) ,
90- } ;
91- let item = & mut items. last_mut ( ) . expect ( "last seen won't lie" ) ;
92- if offset <= item. offset {
93- return Err ( Error :: InvariantIncreasingPackOffset {
94- last_pack_offset : item. offset ,
95- pack_offset : offset,
96- } ) ;
97- }
98- item. next_offset = offset;
99- Ok ( ( ) )
100- }
101-
102- fn set_pack_entries_end_and_resolve_ref_offsets (
103- & mut self ,
104- pack_entries_end : crate :: data:: Offset ,
105- ) -> Result < ( ) , traverse:: Error > {
106- if !self . future_child_offsets . is_empty ( ) {
107- for ( parent_offset, child_index) in self . future_child_offsets . drain ( ..) {
108- // SAFETY invariants upheld:
109- // - We are draining from future_child_offsets and adding to children, keeping things the same.
110- // - We can rely on the `future_child_offsets` invariant to be sure that `children` is
111- // not getting any indices that are already in use in `children` elsewhere
112- // - The indices are in bounds for child_items since they were in bounds for future_child_offsets,
113- // we can carry over the invariant.
114- if let Ok ( i) = self . child_items . binary_search_by_key ( & parent_offset, |i| i. offset ) {
115- self . child_items [ i] . children . push ( child_index as u32 ) ;
116- } else if let Ok ( i) = self . root_items . binary_search_by_key ( & parent_offset, |i| i. offset ) {
117- self . root_items [ i] . children . push ( child_index as u32 ) ;
118- } else {
119- return Err ( traverse:: Error :: OutOfPackRefDelta {
120- base_pack_offset : parent_offset,
121- } ) ;
122- }
123- }
124- }
125-
126- self . assert_is_incrementing_and_update_next_offset ( pack_entries_end)
127- . expect ( "BUG: pack now is smaller than all previously seen entries" ) ;
128- Ok ( ( ) )
129- }
20+ /// Tree datastructure
21+ // kept in separate module to encapsulate unsafety (it has field invariants)
22+ mod tree;
13023
131- /// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
132- /// custom `data` with it.
133- pub fn add_root ( & mut self , offset : crate :: data:: Offset , data : T ) -> Result < ( ) , Error > {
134- self . assert_is_incrementing_and_update_next_offset ( offset) ?;
135- self . last_seen = NodeKind :: Root . into ( ) ;
136- self . root_items . push ( Item {
137- offset,
138- next_offset : 0 ,
139- data,
140- // SAFETY INVARIANT upheld: there are no children
141- children : Default :: default ( ) ,
142- } ) ;
143- Ok ( ( ) )
144- }
145-
146- /// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
147- pub fn add_child (
148- & mut self ,
149- base_offset : crate :: data:: Offset ,
150- offset : crate :: data:: Offset ,
151- data : T ,
152- ) -> Result < ( ) , Error > {
153- self . assert_is_incrementing_and_update_next_offset ( offset) ?;
154-
155- let next_child_index = self . child_items . len ( ) ;
156- // SAFETY INVARIANT upheld:
157- // - This is one of two methods that modifies `children` and future_child_offsets. Out
158- // of the two, it is the only one that produces new indices in the system.
159- // - This always pushes next_child_index to *either* `children` or `future_child_offsets`,
160- // maintaining the cross-field invariant there.
161- // - This method will always push to child_items (at the end), incrementing
162- // future values of next_child_index. This means next_child_index is always
163- // unique for this method call.
164- // - As the only method producing new indices, this is the only time
165- // next_child_index will be added to children/future_child_offsets, upholding the invariant.
166- // - Since next_child_index will always be a valid index by the end of this method,
167- // this always produces valid in-bounds indices, upholding the bounds invariant.
168-
169- if let Ok ( i) = self . child_items . binary_search_by_key ( & base_offset, |i| i. offset ) {
170- self . child_items [ i] . children . push ( next_child_index as u32 ) ;
171- } else if let Ok ( i) = self . root_items . binary_search_by_key ( & base_offset, |i| i. offset ) {
172- self . root_items [ i] . children . push ( next_child_index as u32 ) ;
173- } else {
174- self . future_child_offsets . push ( ( base_offset, next_child_index) ) ;
175- }
176-
177- self . last_seen = NodeKind :: Child . into ( ) ;
178- self . child_items . push ( Item {
179- offset,
180- next_offset : 0 ,
181- data,
182- // SAFETY INVARIANT upheld: there are no children
183- children : Default :: default ( ) ,
184- } ) ;
185- Ok ( ( ) )
186- }
187- }
24+ pub use tree:: { Item , Tree } ;
18825
18926#[ cfg( test) ]
19027mod tests {
191- mod tree {
192- mod from_offsets_in_pack {
193- use std:: sync:: atomic:: AtomicBool ;
194-
195- use crate as pack;
196-
197- const SMALL_PACK_INDEX : & str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx" ;
198- const SMALL_PACK : & str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack" ;
199-
200- const INDEX_V1 : & str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx" ;
201- const PACK_FOR_INDEX_V1 : & str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack" ;
202-
203- use gix_testtools:: fixture_path;
204-
205- #[ test]
206- fn v1 ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
207- tree ( INDEX_V1 , PACK_FOR_INDEX_V1 )
208- }
209-
210- #[ test]
211- fn v2 ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
212- tree ( SMALL_PACK_INDEX , SMALL_PACK )
213- }
214-
215- fn tree ( index_path : & str , pack_path : & str ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
216- let idx = pack:: index:: File :: at ( fixture_path ( index_path) , gix_hash:: Kind :: Sha1 ) ?;
217- crate :: cache:: delta:: Tree :: from_offsets_in_pack (
218- & fixture_path ( pack_path) ,
219- idx. sorted_offsets ( ) . into_iter ( ) ,
220- & |ofs| * ofs,
221- & |id| idx. lookup ( id) . map ( |index| idx. pack_offset_at_index ( index) ) ,
222- & mut gix_features:: progress:: Discard ,
223- & AtomicBool :: new ( false ) ,
224- gix_hash:: Kind :: Sha1 ,
225- ) ?;
226- Ok ( ( ) )
227- }
228- }
229- }
23028
23129 #[ test]
23230 fn size_of_pack_tree_item ( ) {
0 commit comments