@@ -43,7 +43,6 @@ impl Default for Initializer {
4343// the one fitting our cache.
4444impl Initializer {
4545 /// Tests the `bit` of the cache.
46- #[ allow( dead_code) ]
4746 #[ inline]
4847 pub ( crate ) fn test ( self , bit : u32 ) -> bool {
4948 debug_assert ! (
@@ -80,60 +79,65 @@ impl Initializer {
8079// Note: on x64, we only use the first slot
8180static CACHE : [ Cache ; 2 ] = [ Cache :: uninitialized ( ) , Cache :: uninitialized ( ) ] ;
8281
83- /// Feature cache with capacity for `usize::MAX - 1` features.
82+ /// Feature cache with capacity for `size_of::< usize::MAX>() * 8 - 1` features.
8483///
85- /// Note: the last feature bit is used to represent an
86- /// uninitialized cache.
84+ /// Note: 0 is used to represent an uninitialized cache, and (at least) the most
85+ /// significant bit is set on any cache which has been initialized .
8786///
88- /// Note: we can use `Relaxed` atomic operations, because we are only interested
89- /// in the effects of operations on a single memory location. That is, we only
90- /// need "modification order", and not the full-blown "happens before". However,
91- /// we use `SeqCst` just to be on the safe side.
87+ /// Note: we use `Relaxed` atomic operations, because we are only interested in
88+ /// the effects of operations on a single memory location. That is, we only need
89+ /// "modification order", and not the full-blown "happens before".
9290struct Cache ( AtomicUsize ) ;
9391
9492impl Cache {
9593 const CAPACITY : u32 = ( core:: mem:: size_of :: < usize > ( ) * 8 - 1 ) as u32 ;
9694 const MASK : usize = ( 1 << Cache :: CAPACITY ) - 1 ;
95+ const INITIALIZED_BIT : usize = 1usize << Cache :: CAPACITY ;
9796
9897 /// Creates an uninitialized cache.
9998 #[ allow( clippy:: declare_interior_mutable_const) ]
10099 const fn uninitialized ( ) -> Self {
101- Cache ( AtomicUsize :: new ( usize:: MAX ) )
102- }
103- /// Is the cache uninitialized?
104- #[ inline]
105- pub ( crate ) fn is_uninitialized ( & self ) -> bool {
106- self . 0 . load ( Ordering :: SeqCst ) == usize:: MAX
100+ Cache ( AtomicUsize :: new ( 0 ) )
107101 }
108102
109- /// Is the `bit` in the cache set?
103+ /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized.
110104 #[ inline]
111- pub ( crate ) fn test ( & self , bit : u32 ) -> bool {
112- test_bit ( self . 0 . load ( Ordering :: SeqCst ) as u64 , bit)
105+ pub ( crate ) fn test ( & self , bit : u32 ) -> Option < bool > {
106+ let cached = self . 0 . load ( Ordering :: Relaxed ) ;
107+ if cached == 0 {
108+ None
109+ } else {
110+ Some ( test_bit ( cached as u64 , bit) )
111+ }
113112 }
114113
115114 /// Initializes the cache.
116115 #[ inline]
117- fn initialize ( & self , value : usize ) {
118- self . 0 . store ( value, Ordering :: SeqCst ) ;
116+ fn initialize ( & self , value : usize ) -> usize {
117+ debug_assert_eq ! ( ( value & !Cache :: MASK ) , 0 ) ;
118+ self . 0
119+ . store ( value | Cache :: INITIALIZED_BIT , Ordering :: Relaxed ) ;
120+ value
119121 }
120122}
121123
122124cfg_if:: cfg_if! {
123125 if #[ cfg( feature = "std_detect_env_override" ) ] {
124- #[ inline( never ) ]
125- fn initialize( mut value: Initializer ) {
126+ #[ inline]
127+ fn initialize( mut value: Initializer ) -> Initializer {
126128 if let Ok ( disable) = crate :: env:: var( "RUST_STD_DETECT_UNSTABLE" ) {
127129 for v in disable. split( " " ) {
128130 let _ = super :: Feature :: from_str( v) . map( |v| value. unset( v as u32 ) ) ;
129131 }
130132 }
131133 do_initialize( value) ;
134+ value
132135 }
133136 } else {
134137 #[ inline]
135- fn initialize( value: Initializer ) {
138+ fn initialize( value: Initializer ) -> Initializer {
136139 do_initialize( value) ;
140+ value
137141 }
138142 }
139143}
@@ -144,8 +148,22 @@ fn do_initialize(value: Initializer) {
144148 CACHE [ 1 ] . initialize ( ( value. 0 >> Cache :: CAPACITY ) as usize & Cache :: MASK ) ;
145149}
146150
151+ // We only have to detect features once, and it's fairly costly, so hint to LLVM
152+ // that it should assume that cache hits are more common than misses (which is
153+ // the point of caching). It's possibly unfortunate that this function needs to
154+ // reach across modules like this to call `os::detect_features`, but it produces
155+ // the best code out of several attempted variants.
156+ //
157+ // The `Initializer` that the cache was initialized with is returned, so that
158+ // the caller can call `test()` on it without having to load the value from the
159+ // cache again.
160+ #[ cold]
161+ fn detect_and_initialize ( ) -> Initializer {
162+ initialize ( super :: os:: detect_features ( ) )
163+ }
164+
147165/// Tests the `bit` of the storage. If the storage has not been initialized,
148- /// initializes it with the result of `f ()`.
166+ /// initializes it with the result of `os::detect_features ()`.
149167///
150168/// On its first invocation, it detects the CPU features and caches them in the
151169/// `CACHE` global variable as an `AtomicU64`.
@@ -157,18 +175,13 @@ fn do_initialize(value: Initializer) {
157175/// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable
158176/// Features that would had been otherwise detected.
159177#[ inline]
160- pub ( crate ) fn test < F > ( bit : u32 , f : F ) -> bool
161- where
162- F : FnOnce ( ) -> Initializer ,
163- {
164- let ( bit, idx) = if bit < Cache :: CAPACITY {
178+ pub ( crate ) fn test ( bit : u32 ) -> bool {
179+ let ( relative_bit, idx) = if bit < Cache :: CAPACITY {
165180 ( bit, 0 )
166181 } else {
167182 ( bit - Cache :: CAPACITY , 1 )
168183 } ;
169-
170- if CACHE [ idx] . is_uninitialized ( ) {
171- initialize ( f ( ) )
172- }
173- CACHE [ idx] . test ( bit)
184+ CACHE [ idx]
185+ . test ( relative_bit)
186+ . unwrap_or_else ( || detect_and_initialize ( ) . test ( bit) )
174187}
0 commit comments