@@ -162,80 +162,102 @@ fn inv_test_bit(v: usize, idx: u32) -> bool {
162
162
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
163
163
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
164
164
fn detect_features ( ) -> usize {
165
- let ebx ;
166
- let ecx ;
167
- let edx ;
165
+ let extended_features_ebx ;
166
+ let proc_info_ecx ;
167
+ let proc_info_edx ;
168
168
169
169
unsafe {
170
170
/// To obtain all feature flags we need two CPUID queries:
171
171
172
172
/// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
173
173
/// This gives us most of the CPU features in ECX and EDX (see
174
- /// below),
174
+ /// below).
175
175
asm ! ( "cpuid"
176
- : "={ecx}" ( ecx ) , "={edx}" ( edx )
176
+ : "={ecx}" ( proc_info_ecx ) , "={edx}" ( proc_info_edx )
177
177
: "{eax}" ( 0x00000001u32 ) , "{ecx}" ( 0 as u32 )
178
178
: : ) ;
179
179
180
180
/// 2. EAX=7, ECX=0: Queries "Extended Features"
181
181
/// This gives us information about bmi,bmi2, and avx2 support
182
- /// (see below).
182
+ /// (see below); the result in ECX is not currently needed .
183
183
asm ! ( "cpuid"
184
- : "={ebx}" ( ebx )
184
+ : "={ebx}" ( extended_features_ebx )
185
185
: "{eax}" ( 0x00000007u32 ) , "{ecx}" ( 0 as u32 )
186
186
: : ) ;
187
187
}
188
188
189
189
let mut value: usize = 0 ;
190
190
191
- // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX
192
- // (the result in ECX is not currently needed):
193
- if inv_test_bit ( ebx, 3 ) {
191
+ if inv_test_bit ( extended_features_ebx, 3 ) {
194
192
value = set_bit ( value, __Feature:: bmi as u32 ) ;
195
193
}
196
- if inv_test_bit ( ebx, 5 ) {
197
- value = set_bit ( value, __Feature:: avx2 as u32 ) ;
198
- }
199
- if inv_test_bit ( ebx, 8 ) {
194
+ if inv_test_bit ( extended_features_ebx, 8 ) {
200
195
value = set_bit ( value, __Feature:: bmi2 as u32 ) ;
201
196
}
202
197
203
- // CPUID call with EAX=1 => feature bits in ECX and EDX:
204
- if inv_test_bit ( ecx, 0 ) {
198
+ if inv_test_bit ( proc_info_ecx, 0 ) {
205
199
value = set_bit ( value, __Feature:: sse3 as u32 ) ;
206
200
}
207
- if inv_test_bit ( ecx , 5 ) {
201
+ if inv_test_bit ( proc_info_ecx , 5 ) {
208
202
value = set_bit ( value, __Feature:: abm as u32 ) ;
209
203
}
210
- if inv_test_bit ( ecx , 9 ) {
204
+ if inv_test_bit ( proc_info_ecx , 9 ) {
211
205
value = set_bit ( value, __Feature:: ssse3 as u32 ) ;
212
206
}
213
- if inv_test_bit ( ecx , 12 ) {
207
+ if inv_test_bit ( proc_info_ecx , 12 ) {
214
208
value = set_bit ( value, __Feature:: fma as u32 ) ;
215
209
}
216
- if inv_test_bit ( ecx , 19 ) {
210
+ if inv_test_bit ( proc_info_ecx , 19 ) {
217
211
value = set_bit ( value, __Feature:: sse4_1 as u32 ) ;
218
212
}
219
- if inv_test_bit ( ecx , 20 ) {
213
+ if inv_test_bit ( proc_info_ecx , 20 ) {
220
214
value = set_bit ( value, __Feature:: sse4_2 as u32 ) ;
221
215
}
222
- if inv_test_bit ( ecx , 21 ) {
216
+ if inv_test_bit ( proc_info_ecx , 21 ) {
223
217
value = set_bit ( value, __Feature:: tbm as u32 ) ;
224
218
}
225
- if inv_test_bit ( ecx , 23 ) {
219
+ if inv_test_bit ( proc_info_ecx , 23 ) {
226
220
value = set_bit ( value, __Feature:: popcnt as u32 ) ;
227
221
}
228
- if inv_test_bit ( ecx, 28 ) {
229
- value = set_bit ( value, __Feature:: avx as u32 ) ;
230
- }
231
222
232
- if inv_test_bit ( edx , 25 ) {
223
+ if inv_test_bit ( proc_info_edx , 25 ) {
233
224
value = set_bit ( value, __Feature:: sse as u32 ) ;
234
225
}
235
- if inv_test_bit ( edx , 26 ) {
226
+ if inv_test_bit ( proc_info_edx , 26 ) {
236
227
value = set_bit ( value, __Feature:: sse2 as u32 ) ;
237
228
}
238
229
230
+ // ECX[26] detects XSAVE and ECX[27] detects OSXSAVE, that is, whether the
231
+ // OS is AVX enabled and supports saving the state of the AVX/AVX2 vector
232
+ // registers on context-switches, see:
233
+ //
234
+ // - https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
235
+ // - https://hg.mozilla.
236
+ // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
237
+ //
238
+ if inv_test_bit ( proc_info_ecx, 26 ) && inv_test_bit ( proc_info_ecx, 27 ) {
239
+ unsafe fn xgetbv ( xcr_no : u32 ) -> u64 {
240
+ let eax: u32 ;
241
+ let edx: u32 ;
242
+ // xgetbv
243
+ asm ! ( "xgetbv"
244
+ : "={eax}" ( eax) , "={edx}" ( edx)
245
+ : "{ecx}" ( xcr_no)
246
+ : : ) ;
247
+ ( ( edx as u64 ) << 32 ) | ( eax as u64 )
248
+ }
249
+
250
+ // This is safe because on x86 `xgetbv` is always available.
251
+ if unsafe { xgetbv ( 0 ) } & 6 == 6 {
252
+ if inv_test_bit ( proc_info_ecx, 28 ) {
253
+ value = set_bit ( value, __Feature:: avx as u32 ) ;
254
+ }
255
+ if inv_test_bit ( extended_features_ebx, 5 ) {
256
+ value = set_bit ( value, __Feature:: avx2 as u32 ) ;
257
+ }
258
+ }
259
+ }
260
+
239
261
value
240
262
}
241
263
0 commit comments