@@ -2,13 +2,16 @@ use crate::dep_graph::DepNodeIndex;
22
33use rustc_data_structures:: fx:: FxHashMap ;
44use rustc_data_structures:: sharded:: { self , Sharded } ;
5- use rustc_data_structures:: sync:: { Lock , OnceLock } ;
5+ use rustc_data_structures:: sync:: { AtomicUsize , OnceLock } ;
66use rustc_hir:: def_id:: LOCAL_CRATE ;
7- use rustc_index:: { Idx , IndexVec } ;
7+ use rustc_index:: Idx ;
88use rustc_span:: def_id:: DefId ;
99use rustc_span:: def_id:: DefIndex ;
1010use std:: fmt:: Debug ;
1111use std:: hash:: Hash ;
12+ use std:: marker:: PhantomData ;
13+ use std:: mem:: offset_of;
14+ use std:: sync:: atomic:: { AtomicPtr , AtomicU32 , Ordering } ;
1215
1316pub trait QueryCache : Sized {
1417 type Key : Hash + Eq + Copy + Debug ;
@@ -100,16 +103,173 @@ where
100103 }
101104}
102105
103- pub struct VecCache < K : Idx , V > {
104- cache : Lock < IndexVec < K , Option < ( V , DepNodeIndex ) > > > ,
106+ struct Slot < V > {
107+ // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
108+ value : V ,
109+ // This is both an index and a once-lock.
110+ //
111+ // 0: not yet initialized.
112+ // 1: lock held, initializing.
113+ // 2..u32::MAX - 2: initialized.
114+ index_and_lock : AtomicU32 ,
105115}
106116
107117impl < K : Idx , V > Default for VecCache < K , V > {
108118 fn default ( ) -> Self {
109- VecCache { cache : Default :: default ( ) }
119+ VecCache {
120+ buckets : Default :: default ( ) ,
121+ key : PhantomData ,
122+ len : Default :: default ( ) ,
123+ present : Default :: default ( ) ,
124+ }
125+ }
126+ }
127+
128+ #[ derive( Copy , Clone , Debug ) ]
129+ struct SlotIndex {
130+ bucket_idx : usize ,
131+ entries : usize ,
132+ index_in_bucket : usize ,
133+ }
134+
135+ impl SlotIndex {
136+ #[ inline]
137+ fn from_index ( idx : u32 ) -> Self {
138+ let mut bucket = idx. checked_ilog2 ( ) . unwrap_or ( 0 ) as usize ;
139+ let entries;
140+ let running_sum;
141+ if bucket <= 11 {
142+ entries = 1 << 12 ;
143+ running_sum = 0 ;
144+ bucket = 0 ;
145+ } else {
146+ entries = 1 << bucket;
147+ running_sum = entries;
148+ bucket = bucket - 11 ;
149+ }
150+ SlotIndex { bucket_idx : bucket, entries, index_in_bucket : idx as usize - running_sum }
151+ }
152+
153+ #[ inline]
154+ unsafe fn get < V : Copy > ( & self , buckets : & [ AtomicPtr < Slot < V > > ; 21 ] ) -> Option < ( V , u32 ) > {
155+ // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
156+ // in-bounds of buckets.
157+ let bucket = unsafe { buckets. get_unchecked ( self . bucket_idx ) } ;
158+ let ptr = bucket. load ( Ordering :: Acquire ) ;
159+ // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
160+ if ptr. is_null ( ) {
161+ return None ;
162+ }
163+ // SAFETY: Follows from preconditions on `buckets` and `self.
164+ let slot = unsafe { ptr. add ( self . index_in_bucket ) } ;
165+
166+ // SAFETY:
167+ let index_and_lock =
168+ unsafe { & * slot. byte_add ( offset_of ! ( Slot <V >, index_and_lock) ) . cast :: < AtomicU32 > ( ) } ;
169+ let current = index_and_lock. load ( Ordering :: Acquire ) ;
170+ let index = match current {
171+ 0 => return None ,
172+ // Treat "initializing" as actually just not initialized at all.
173+ // The only reason this is a separate state is that `complete` calls could race and
174+ // we can't allow that, but from load perspective there's no difference.
175+ 1 => return None ,
176+ _ => current - 2 ,
177+ } ;
178+
179+ // SAFETY:
180+ // * slot is a valid pointer (buckets are always valid for the index we get).
181+ // * value is initialized since we saw a >= 2 index above.
182+ // * `V: Copy`, so safe to read.
183+ let value = unsafe { slot. byte_add ( offset_of ! ( Slot <V >, value) ) . cast :: < V > ( ) . read ( ) } ;
184+ Some ( ( value, index) )
185+ }
186+
187+ /// Returns true if this successfully put into the map.
188+ #[ inline]
189+ unsafe fn put < V > ( & self , buckets : & [ AtomicPtr < Slot < V > > ; 21 ] , value : V , extra : u32 ) -> bool {
190+ static LOCK : std:: sync:: Mutex < ( ) > = std:: sync:: Mutex :: new ( ( ) ) ;
191+
192+ // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
193+ // in-bounds of buckets.
194+ let bucket = unsafe { buckets. get_unchecked ( self . bucket_idx ) } ;
195+ let mut ptr = bucket. load ( Ordering :: Acquire ) ;
196+ let _allocator_guard;
197+ if ptr. is_null ( ) {
198+ // If we load null, then acquire the global lock; this path is quite cold, so it's cheap
199+ // to use a global lock.
200+ _allocator_guard = LOCK . lock ( ) ;
201+ // And re-load the value.
202+ ptr = bucket. load ( Ordering :: Acquire ) ;
203+ }
204+
205+ // OK, now under the allocator lock, if we're still null then it's definitely us that will
206+ // initialize this bucket.
207+ if ptr. is_null ( ) {
208+ let bucket_layout =
209+ std:: alloc:: Layout :: array :: < Slot < V > > ( self . entries as usize ) . unwrap ( ) ;
210+ // SAFETY: Always >0 entries in each bucket.
211+ let allocated = unsafe { std:: alloc:: alloc_zeroed ( bucket_layout) . cast :: < Slot < V > > ( ) } ;
212+ if allocated. is_null ( ) {
213+ std:: alloc:: handle_alloc_error ( bucket_layout) ;
214+ }
215+ bucket. store ( allocated, Ordering :: Release ) ;
216+ ptr = allocated;
217+ }
218+ assert ! ( !ptr. is_null( ) ) ;
219+
220+ // SAFETY: `index_in_bucket` is always in-bounds of the allocated array.
221+ let slot = unsafe { ptr. add ( self . index_in_bucket ) } ;
222+
223+ let index_and_lock =
224+ unsafe { & * slot. byte_add ( offset_of ! ( Slot <V >, index_and_lock) ) . cast :: < AtomicU32 > ( ) } ;
225+ match index_and_lock. compare_exchange ( 0 , 1 , Ordering :: Relaxed , Ordering :: Relaxed ) {
226+ Ok ( _) => {
227+ // We have acquired the initialization lock. It is our job to write `value` and
228+ // then set the lock to the real index.
229+
230+ unsafe {
231+ slot. byte_add ( offset_of ! ( Slot <V >, value) ) . cast :: < V > ( ) . write ( value) ;
232+ }
233+
234+ index_and_lock. store ( extra. checked_add ( 2 ) . unwrap ( ) , Ordering :: Release ) ;
235+
236+ true
237+ }
238+
239+ // Treat "initializing" as actually initialized: we lost the race and should skip
240+ // any updates to this slot.
241+ //
242+ // FIXME: Is that OK? Some of our other implementations replace the entry instead of
243+ // preserving the earliest write. We could do a parking-lot or spinlock here...
244+ Err ( 1 ) => false ,
245+
246+ // This slot was already populated. Also ignore, currently this is the same as
247+ // "initializing".
248+ Err ( _) => false ,
249+ }
110250 }
111251}
112252
253+ pub struct VecCache < K : Idx , V > {
254+ // Entries per bucket:
255+ // Bucket 0: 4096 2^12
256+ // Bucket 1: 4096 2^12
257+ // Bucket 2: 8192
258+ // Bucket 3: 16384
259+ // ...
260+ // Bucket 19: 1073741824
261+ // Bucket 20: 2147483648
262+ // The total number of entries if all buckets are initialized is u32::MAX-1.
263+ buckets : [ AtomicPtr < Slot < V > > ; 21 ] ,
264+
265+ // Present and len are only used during incremental and self-profiling.
266+ // They are an optimization over iterating the full buckets array.
267+ present : [ AtomicPtr < Slot < ( ) > > ; 21 ] ,
268+ len : AtomicUsize ,
269+
270+ key : PhantomData < K > ,
271+ }
272+
113273impl < K , V > QueryCache for VecCache < K , V >
114274where
115275 K : Eq + Idx + Copy + Debug ,
@@ -120,20 +280,41 @@ where
120280
121281 #[ inline( always) ]
122282 fn lookup ( & self , key : & K ) -> Option < ( V , DepNodeIndex ) > {
123- let lock = self . cache . lock ( ) ;
124- if let Some ( Some ( value) ) = lock. get ( * key) { Some ( * value) } else { None }
283+ let key = u32:: try_from ( key. index ( ) ) . unwrap ( ) ;
284+ let slot_idx = SlotIndex :: from_index ( key) ;
285+ match unsafe { slot_idx. get ( & self . buckets ) } {
286+ Some ( ( value, idx) ) => Some ( ( value, DepNodeIndex :: from_u32 ( idx) ) ) ,
287+ None => None ,
288+ }
125289 }
126290
127291 #[ inline]
128292 fn complete ( & self , key : K , value : V , index : DepNodeIndex ) {
129- let mut lock = self . cache . lock ( ) ;
130- lock. insert ( key, ( value, index) ) ;
293+ let key = u32:: try_from ( key. index ( ) ) . unwrap ( ) ;
294+ let slot_idx = SlotIndex :: from_index ( key) ;
295+ if unsafe { slot_idx. put ( & self . buckets , value, index. index ( ) as u32 ) } {
296+ let present_idx = self . len . fetch_add ( 1 , Ordering :: Relaxed ) ;
297+ let slot = SlotIndex :: from_index ( present_idx as u32 ) ;
298+ // We should always be uniquely putting due to `len` fetch_add returning unique values.
299+ assert ! ( unsafe { slot. put( & self . present, ( ) , key) } ) ;
300+ }
131301 }
132302
133303 fn iter ( & self , f : & mut dyn FnMut ( & Self :: Key , & Self :: Value , DepNodeIndex ) ) {
134- for ( k, v) in self . cache . lock ( ) . iter_enumerated ( ) {
135- if let Some ( v) = v {
136- f ( & k, & v. 0 , v. 1 ) ;
304+ for idx in 0 ..self . len . load ( Ordering :: Relaxed ) {
305+ let key = SlotIndex :: from_index ( idx as u32 ) ;
306+ match unsafe { key. get ( & self . present ) } {
307+ // This shouldn't happen in our current usage (iter is really only
308+ // used long after queries are done running), but if we hit this in practice it's
309+ // probably fine to just break early.
310+ None => unreachable ! ( ) ,
311+ Some ( ( ( ) , key) ) => {
312+ let key = K :: new ( key as usize ) ;
313+ // unwrap() is OK: present entries are always written only after we put the real
314+ // entry.
315+ let value = self . lookup ( & key) . unwrap ( ) ;
316+ f ( & key, & value. 0 , value. 1 ) ;
317+ }
137318 }
138319 }
139320 }
@@ -142,10 +323,7 @@ where
142323pub struct DefIdCache < V > {
143324 /// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
144325 /// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
145- ///
146- /// The second element of the tuple is the set of keys actually present in the IndexVec, used
147- /// for faster iteration in `iter()`.
148- local : Lock < ( IndexVec < DefIndex , Option < ( V , DepNodeIndex ) > > , Vec < DefIndex > ) > ,
326+ local : VecCache < DefIndex , V > ,
149327 foreign : DefaultCache < DefId , V > ,
150328}
151329
@@ -165,8 +343,7 @@ where
165343 #[ inline( always) ]
166344 fn lookup ( & self , key : & DefId ) -> Option < ( V , DepNodeIndex ) > {
167345 if key. krate == LOCAL_CRATE {
168- let cache = self . local . lock ( ) ;
169- cache. 0 . get ( key. index ) . and_then ( |v| * v)
346+ self . local . lookup ( & key. index )
170347 } else {
171348 self . foreign . lookup ( key)
172349 }
@@ -175,27 +352,43 @@ where
175352 #[ inline]
176353 fn complete ( & self , key : DefId , value : V , index : DepNodeIndex ) {
177354 if key. krate == LOCAL_CRATE {
178- let mut cache = self . local . lock ( ) ;
179- let ( cache, present) = & mut * cache;
180- let slot = cache. ensure_contains_elem ( key. index , Default :: default) ;
181- if slot. is_none ( ) {
182- // FIXME: Only store the present set when running in incremental mode. `iter` is not
183- // used outside of saving caches to disk and self-profile.
184- present. push ( key. index ) ;
185- }
186- * slot = Some ( ( value, index) ) ;
355+ self . local . complete ( key. index , value, index)
187356 } else {
188357 self . foreign . complete ( key, value, index)
189358 }
190359 }
191360
192361 fn iter ( & self , f : & mut dyn FnMut ( & Self :: Key , & Self :: Value , DepNodeIndex ) ) {
193- let guard = self . local . lock ( ) ;
194- let ( cache, present) = & * guard;
195- for & idx in present. iter ( ) {
196- let value = cache[ idx] . unwrap ( ) ;
197- f ( & DefId { krate : LOCAL_CRATE , index : idx } , & value. 0 , value. 1 ) ;
198- }
362+ self . local . iter ( & mut |key, value, index| {
363+ f ( & DefId { krate : LOCAL_CRATE , index : * key } , value, index) ;
364+ } ) ;
199365 self . foreign . iter ( f) ;
200366 }
201367}
368+
369+ #[ test]
370+ fn slot_index_exhaustive ( ) {
371+ let mut buckets = [ 0u32 ; 21 ] ;
372+ for idx in 0 ..=u32:: MAX {
373+ buckets[ SlotIndex :: from_index ( idx) . bucket_idx ] += 1 ;
374+ }
375+ let mut prev = None :: < SlotIndex > ;
376+ for idx in 0 ..u32:: MAX {
377+ let slot_idx = SlotIndex :: from_index ( idx) ;
378+ if let Some ( p) = prev {
379+ if p. bucket_idx == slot_idx. bucket_idx {
380+ assert_eq ! ( p. index_in_bucket + 1 , slot_idx. index_in_bucket) ;
381+ } else {
382+ assert_eq ! ( slot_idx. index_in_bucket, 0 ) ;
383+ }
384+ } else {
385+ assert_eq ! ( idx, 0 ) ;
386+ assert_eq ! ( slot_idx. index_in_bucket, 0 ) ;
387+ assert_eq ! ( slot_idx. bucket_idx, 0 ) ;
388+ }
389+
390+ assert_eq ! ( buckets[ slot_idx. bucket_idx] , slot_idx. entries as u32 ) ;
391+
392+ prev = Some ( slot_idx) ;
393+ }
394+ }
0 commit comments