@@ -2,13 +2,16 @@ use crate::dep_graph::DepNodeIndex;
22
33use rustc_data_structures:: fx:: FxHashMap ;
44use rustc_data_structures:: sharded:: { self , Sharded } ;
5- use rustc_data_structures:: sync:: { Lock , OnceLock } ;
5+ use rustc_data_structures:: sync:: { AtomicUsize , OnceLock } ;
66use rustc_hir:: def_id:: LOCAL_CRATE ;
7- use rustc_index:: { Idx , IndexVec } ;
7+ use rustc_index:: Idx ;
88use rustc_span:: def_id:: DefId ;
99use rustc_span:: def_id:: DefIndex ;
1010use std:: fmt:: Debug ;
1111use std:: hash:: Hash ;
12+ use std:: marker:: PhantomData ;
13+ use std:: mem:: offset_of;
14+ use std:: sync:: atomic:: { AtomicPtr , AtomicU32 , Ordering } ;
1215
1316pub trait QueryCache : Sized {
1417 type Key : Hash + Eq + Copy + Debug ;
@@ -100,13 +103,201 @@ where
100103 }
101104}
102105
103- pub struct VecCache < K : Idx , V > {
104- cache : Lock < IndexVec < K , Option < ( V , DepNodeIndex ) > > > ,
106+ struct Slot < V > {
107+ // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
108+ value : V ,
109+ // This is both an index and a once-lock.
110+ //
111+ // 0: not yet initialized.
112+ // 1: lock held, initializing.
113+ // 2..u32::MAX - 2: initialized.
114+ index_and_lock : AtomicU32 ,
105115}
106116
107117impl < K : Idx , V > Default for VecCache < K , V > {
108118 fn default ( ) -> Self {
109- VecCache { cache : Default :: default ( ) }
119+ VecCache {
120+ buckets : Default :: default ( ) ,
121+ key : PhantomData ,
122+ len : Default :: default ( ) ,
123+ present : Default :: default ( ) ,
124+ }
125+ }
126+ }
127+
128+ #[ derive( Copy , Clone , Debug ) ]
129+ struct SlotIndex {
130+ bucket_idx : usize ,
131+ entries : usize ,
132+ index_in_bucket : usize ,
133+ }
134+
135+ impl SlotIndex {
136+ #[ inline]
137+ fn from_index ( idx : u32 ) -> Self {
138+ let mut bucket = idx. checked_ilog2 ( ) . unwrap_or ( 0 ) as usize ;
139+ let entries;
140+ let running_sum;
141+ if bucket <= 11 {
142+ entries = 1 << 12 ;
143+ running_sum = 0 ;
144+ bucket = 0 ;
145+ } else {
146+ entries = 1 << bucket;
147+ running_sum = entries;
148+ bucket = bucket - 11 ;
149+ }
150+ SlotIndex { bucket_idx : bucket, entries, index_in_bucket : idx as usize - running_sum }
151+ }
152+
153+ #[ inline]
154+ unsafe fn get < V : Copy > ( & self , buckets : & [ AtomicPtr < Slot < V > > ; 21 ] ) -> Option < ( V , u32 ) > {
155+ // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
156+ // in-bounds of buckets. See `from_index` for computation.
157+ let bucket = unsafe { buckets. get_unchecked ( self . bucket_idx ) } ;
158+ let ptr = bucket. load ( Ordering :: Acquire ) ;
159+ // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
160+ if ptr. is_null ( ) {
161+ return None ;
162+ }
163+ // SAFETY: Follows from preconditions on `buckets` and `self`.
164+ let slot = unsafe { ptr. add ( self . index_in_bucket ) } ;
165+
166+ // SAFETY:
167+ let index_and_lock =
168+ unsafe { & * slot. byte_add ( offset_of ! ( Slot <V >, index_and_lock) ) . cast :: < AtomicU32 > ( ) } ;
169+ let current = index_and_lock. load ( Ordering :: Acquire ) ;
170+ let index = match current {
171+ 0 => return None ,
172+ // Treat "initializing" as actually just not initialized at all.
173+ // The only reason this is a separate state is that `complete` calls could race and
174+ // we can't allow that, but from load perspective there's no difference.
175+ 1 => return None ,
176+ _ => current - 2 ,
177+ } ;
178+
179+ // SAFETY:
180+ // * slot is a valid pointer (buckets are always valid for the index we get).
181+ // * value is initialized since we saw a >= 2 index above.
182+ // * `V: Copy`, so safe to read.
183+ let value = unsafe { slot. byte_add ( offset_of ! ( Slot <V >, value) ) . cast :: < V > ( ) . read ( ) } ;
184+ Some ( ( value, index) )
185+ }
186+
187+ /// Returns true if this successfully put into the map.
188+ #[ inline]
189+ fn put < V > ( & self , buckets : & [ AtomicPtr < Slot < V > > ; 21 ] , value : V , extra : u32 ) -> bool {
190+ static LOCK : std:: sync:: Mutex < ( ) > = std:: sync:: Mutex :: new ( ( ) ) ;
191+
192+ // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
193+ // in-bounds of buckets.
194+ let bucket = unsafe { buckets. get_unchecked ( self . bucket_idx ) } ;
195+ let mut ptr = bucket. load ( Ordering :: Acquire ) ;
196+ let _allocator_guard;
197+ if ptr. is_null ( ) {
198+ // If we load null, then acquire the global lock; this path is quite cold, so it's cheap
199+ // to use a global lock.
200+ _allocator_guard = LOCK . lock ( ) ;
201+ // And re-load the value.
202+ ptr = bucket. load ( Ordering :: Acquire ) ;
203+ }
204+
205+ // OK, now under the allocator lock, if we're still null then it's definitely us that will
206+ // initialize this bucket.
207+ if ptr. is_null ( ) {
208+ let bucket_layout =
209+ std:: alloc:: Layout :: array :: < Slot < V > > ( self . entries as usize ) . unwrap ( ) ;
210+ // SAFETY: Always >0 entries in each bucket.
211+ let allocated = unsafe { std:: alloc:: alloc_zeroed ( bucket_layout) . cast :: < Slot < V > > ( ) } ;
212+ if allocated. is_null ( ) {
213+ std:: alloc:: handle_alloc_error ( bucket_layout) ;
214+ }
215+ bucket. store ( allocated, Ordering :: Release ) ;
216+ ptr = allocated;
217+ }
218+ assert ! ( !ptr. is_null( ) ) ;
219+
220+ // SAFETY: `index_in_bucket` is always in-bounds of the allocated array.
221+ let slot = unsafe { ptr. add ( self . index_in_bucket ) } ;
222+
223+ let index_and_lock =
224+ unsafe { & * slot. byte_add ( offset_of ! ( Slot <V >, index_and_lock) ) . cast :: < AtomicU32 > ( ) } ;
225+ match index_and_lock. compare_exchange ( 0 , 1 , Ordering :: Relaxed , Ordering :: Relaxed ) {
226+ Ok ( _) => {
227+ // We have acquired the initialization lock. It is our job to write `value` and
228+ // then set the lock to the real index.
229+
230+ unsafe {
231+ slot. byte_add ( offset_of ! ( Slot <V >, value) ) . cast :: < V > ( ) . write ( value) ;
232+ }
233+
234+ index_and_lock. store ( extra. checked_add ( 2 ) . unwrap ( ) , Ordering :: Release ) ;
235+
236+ true
237+ }
238+
239+ // Treat "initializing" as actually initialized: we lost the race and should skip
240+ // any updates to this slot. In practice this should be unreachable since we're guarded
241+ // by an external lock that only allows one initialization for any given query result.
242+ Err ( 1 ) => unreachable ! ( ) ,
243+
244+ // This slot was already populated. Also ignore, currently this is the same as
245+ // "initializing".
246+ Err ( _) => false ,
247+ }
248+ }
249+ }
250+
251+ pub struct VecCache < K : Idx , V > {
252+ // Entries per bucket:
253+ // Bucket 0: 4096 2^12
254+ // Bucket 1: 4096 2^12
255+ // Bucket 2: 8192
256+ // Bucket 3: 16384
257+ // ...
258+ // Bucket 19: 1073741824
259+ // Bucket 20: 2147483648
260+ // The total number of entries if all buckets are initialized is u32::MAX-1.
261+ buckets : [ AtomicPtr < Slot < V > > ; 21 ] ,
262+
263+ // Present and len are only used during incremental and self-profiling.
264+ // They are an optimization over iterating the full buckets array.
265+ present : [ AtomicPtr < Slot < ( ) > > ; 21 ] ,
266+ len : AtomicUsize ,
267+
268+ key : PhantomData < K > ,
269+ }
270+
271+ // SAFETY: No access to `V` is made.
272+ unsafe impl < K : Idx , #[ may_dangle] V > Drop for VecCache < K , V > {
273+ fn drop ( & mut self ) {
274+ // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
275+ // we are also guaranteed to just need to deallocate any large arrays (not iterate over
276+ // contents).
277+
278+ let mut entries = 1 << 12 ;
279+ for bucket in self . buckets . iter ( ) {
280+ let bucket = bucket. load ( Ordering :: Acquire ) ;
281+ if !bucket. is_null ( ) {
282+ let layout = std:: alloc:: Layout :: array :: < Slot < V > > ( entries) . unwrap ( ) ;
283+ unsafe {
284+ std:: alloc:: dealloc ( bucket. cast ( ) , layout) ;
285+ }
286+ }
287+ entries *= 2 ;
288+ }
289+
290+ let mut entries = 1 << 12 ;
291+ for bucket in self . present . iter ( ) {
292+ let bucket = bucket. load ( Ordering :: Acquire ) ;
293+ if !bucket. is_null ( ) {
294+ let layout = std:: alloc:: Layout :: array :: < Slot < V > > ( entries) . unwrap ( ) ;
295+ unsafe {
296+ std:: alloc:: dealloc ( bucket. cast ( ) , layout) ;
297+ }
298+ }
299+ entries *= 2 ;
300+ }
110301 }
111302}
112303
@@ -120,20 +311,41 @@ where
120311
121312 #[ inline( always) ]
122313 fn lookup ( & self , key : & K ) -> Option < ( V , DepNodeIndex ) > {
123- let lock = self . cache . lock ( ) ;
124- if let Some ( Some ( value) ) = lock. get ( * key) { Some ( * value) } else { None }
314+ let key = u32:: try_from ( key. index ( ) ) . unwrap ( ) ;
315+ let slot_idx = SlotIndex :: from_index ( key) ;
316+ match unsafe { slot_idx. get ( & self . buckets ) } {
317+ Some ( ( value, idx) ) => Some ( ( value, DepNodeIndex :: from_u32 ( idx) ) ) ,
318+ None => None ,
319+ }
125320 }
126321
127322 #[ inline]
128323 fn complete ( & self , key : K , value : V , index : DepNodeIndex ) {
129- let mut lock = self . cache . lock ( ) ;
130- lock. insert ( key, ( value, index) ) ;
324+ let key = u32:: try_from ( key. index ( ) ) . unwrap ( ) ;
325+ let slot_idx = SlotIndex :: from_index ( key) ;
326+ if slot_idx. put ( & self . buckets , value, index. index ( ) as u32 ) {
327+ let present_idx = self . len . fetch_add ( 1 , Ordering :: Relaxed ) ;
328+ let slot = SlotIndex :: from_index ( present_idx as u32 ) ;
329+ // We should always be uniquely putting due to `len` fetch_add returning unique values.
330+ assert ! ( slot. put( & self . present, ( ) , key) ) ;
331+ }
131332 }
132333
133334 fn iter ( & self , f : & mut dyn FnMut ( & Self :: Key , & Self :: Value , DepNodeIndex ) ) {
134- for ( k, v) in self . cache . lock ( ) . iter_enumerated ( ) {
135- if let Some ( v) = v {
136- f ( & k, & v. 0 , v. 1 ) ;
335+ for idx in 0 ..self . len . load ( Ordering :: Relaxed ) {
336+ let key = SlotIndex :: from_index ( idx as u32 ) ;
337+ match unsafe { key. get ( & self . present ) } {
338+ // This shouldn't happen in our current usage (iter is really only
339+ // used long after queries are done running), but if we hit this in practice it's
340+ // probably fine to just break early.
341+ None => unreachable ! ( ) ,
342+ Some ( ( ( ) , key) ) => {
343+ let key = K :: new ( key as usize ) ;
344+ // unwrap() is OK: present entries are always written only after we put the real
345+ // entry.
346+ let value = self . lookup ( & key) . unwrap ( ) ;
347+ f ( & key, & value. 0 , value. 1 ) ;
348+ }
137349 }
138350 }
139351 }
@@ -142,10 +354,7 @@ where
142354pub struct DefIdCache < V > {
143355 /// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
144356 /// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
145- ///
146- /// The second element of the tuple is the set of keys actually present in the IndexVec, used
147- /// for faster iteration in `iter()`.
148- local : Lock < ( IndexVec < DefIndex , Option < ( V , DepNodeIndex ) > > , Vec < DefIndex > ) > ,
357+ local : VecCache < DefIndex , V > ,
149358 foreign : DefaultCache < DefId , V > ,
150359}
151360
@@ -165,8 +374,7 @@ where
165374 #[ inline( always) ]
166375 fn lookup ( & self , key : & DefId ) -> Option < ( V , DepNodeIndex ) > {
167376 if key. krate == LOCAL_CRATE {
168- let cache = self . local . lock ( ) ;
169- cache. 0 . get ( key. index ) . and_then ( |v| * v)
377+ self . local . lookup ( & key. index )
170378 } else {
171379 self . foreign . lookup ( key)
172380 }
@@ -175,27 +383,19 @@ where
175383 #[ inline]
176384 fn complete ( & self , key : DefId , value : V , index : DepNodeIndex ) {
177385 if key. krate == LOCAL_CRATE {
178- let mut cache = self . local . lock ( ) ;
179- let ( cache, present) = & mut * cache;
180- let slot = cache. ensure_contains_elem ( key. index , Default :: default) ;
181- if slot. is_none ( ) {
182- // FIXME: Only store the present set when running in incremental mode. `iter` is not
183- // used outside of saving caches to disk and self-profile.
184- present. push ( key. index ) ;
185- }
186- * slot = Some ( ( value, index) ) ;
386+ self . local . complete ( key. index , value, index)
187387 } else {
188388 self . foreign . complete ( key, value, index)
189389 }
190390 }
191391
192392 fn iter ( & self , f : & mut dyn FnMut ( & Self :: Key , & Self :: Value , DepNodeIndex ) ) {
193- let guard = self . local . lock ( ) ;
194- let ( cache, present) = & * guard;
195- for & idx in present. iter ( ) {
196- let value = cache[ idx] . unwrap ( ) ;
197- f ( & DefId { krate : LOCAL_CRATE , index : idx } , & value. 0 , value. 1 ) ;
198- }
393+ self . local . iter ( & mut |key, value, index| {
394+ f ( & DefId { krate : LOCAL_CRATE , index : * key } , value, index) ;
395+ } ) ;
199396 self . foreign . iter ( f) ;
200397 }
201398}
399+
400+ #[ cfg( test) ]
401+ mod tests;
0 commit comments