Skip to content

Commit ab6e166

Browse files
gnzlbgalexcrichton
authored andcommitted
[runtime-detection-x86] detect avx and avx2 only if osxsave is true (rust-lang#154)
1 parent f32393d commit ab6e166

File tree

1 file changed

+50
-28
lines changed

1 file changed

+50
-28
lines changed

src/x86/runtime.rs

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -162,80 +162,102 @@ fn inv_test_bit(v: usize, idx: u32) -> bool {
162162
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
163163
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
164164
fn detect_features() -> usize {
165-
let ebx;
166-
let ecx;
167-
let edx;
165+
let extended_features_ebx;
166+
let proc_info_ecx;
167+
let proc_info_edx;
168168

169169
unsafe {
170170
/// To obtain all feature flags we need two CPUID queries:
171171
172172
/// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
173173
/// This gives us most of the CPU features in ECX and EDX (see
174-
/// below),
174+
/// below).
175175
asm!("cpuid"
176-
: "={ecx}"(ecx), "={edx}"(edx)
176+
: "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx)
177177
: "{eax}"(0x00000001u32), "{ecx}"(0 as u32)
178178
: :);
179179

180180
/// 2. EAX=7, ECX=0: Queries "Extended Features"
181181
/// This gives us information about bmi,bmi2, and avx2 support
182-
/// (see below).
182+
/// (see below); the result in ECX is not currently needed.
183183
asm!("cpuid"
184-
: "={ebx}"(ebx)
184+
: "={ebx}"(extended_features_ebx)
185185
: "{eax}"(0x00000007u32), "{ecx}"(0 as u32)
186186
: :);
187187
}
188188

189189
let mut value: usize = 0;
190190

191-
// CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX
192-
// (the result in ECX is not currently needed):
193-
if inv_test_bit(ebx, 3) {
191+
if inv_test_bit(extended_features_ebx, 3) {
194192
value = set_bit(value, __Feature::bmi as u32);
195193
}
196-
if inv_test_bit(ebx, 5) {
197-
value = set_bit(value, __Feature::avx2 as u32);
198-
}
199-
if inv_test_bit(ebx, 8) {
194+
if inv_test_bit(extended_features_ebx, 8) {
200195
value = set_bit(value, __Feature::bmi2 as u32);
201196
}
202197

203-
// CPUID call with EAX=1 => feature bits in ECX and EDX:
204-
if inv_test_bit(ecx, 0) {
198+
if inv_test_bit(proc_info_ecx, 0) {
205199
value = set_bit(value, __Feature::sse3 as u32);
206200
}
207-
if inv_test_bit(ecx, 5) {
201+
if inv_test_bit(proc_info_ecx, 5) {
208202
value = set_bit(value, __Feature::abm as u32);
209203
}
210-
if inv_test_bit(ecx, 9) {
204+
if inv_test_bit(proc_info_ecx, 9) {
211205
value = set_bit(value, __Feature::ssse3 as u32);
212206
}
213-
if inv_test_bit(ecx, 12) {
207+
if inv_test_bit(proc_info_ecx, 12) {
214208
value = set_bit(value, __Feature::fma as u32);
215209
}
216-
if inv_test_bit(ecx, 19) {
210+
if inv_test_bit(proc_info_ecx, 19) {
217211
value = set_bit(value, __Feature::sse4_1 as u32);
218212
}
219-
if inv_test_bit(ecx, 20) {
213+
if inv_test_bit(proc_info_ecx, 20) {
220214
value = set_bit(value, __Feature::sse4_2 as u32);
221215
}
222-
if inv_test_bit(ecx, 21) {
216+
if inv_test_bit(proc_info_ecx, 21) {
223217
value = set_bit(value, __Feature::tbm as u32);
224218
}
225-
if inv_test_bit(ecx, 23) {
219+
if inv_test_bit(proc_info_ecx, 23) {
226220
value = set_bit(value, __Feature::popcnt as u32);
227221
}
228-
if inv_test_bit(ecx, 28) {
229-
value = set_bit(value, __Feature::avx as u32);
230-
}
231222

232-
if inv_test_bit(edx, 25) {
223+
if inv_test_bit(proc_info_edx, 25) {
233224
value = set_bit(value, __Feature::sse as u32);
234225
}
235-
if inv_test_bit(edx, 26) {
226+
if inv_test_bit(proc_info_edx, 26) {
236227
value = set_bit(value, __Feature::sse2 as u32);
237228
}
238229

230+
// ECX[26] detects XSAVE and ECX[27] detects OSXSAVE, that is, whether the
231+
// OS is AVX enabled and supports saving the state of the AVX/AVX2 vector
232+
// registers on context-switches, see:
233+
//
234+
// - https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
235+
// - https://hg.mozilla.
236+
// org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
237+
//
238+
if inv_test_bit(proc_info_ecx, 26) && inv_test_bit(proc_info_ecx, 27) {
239+
unsafe fn xgetbv(xcr_no: u32) -> u64 {
240+
let eax: u32;
241+
let edx: u32;
242+
// xgetbv
243+
asm!("xgetbv"
244+
: "={eax}"(eax), "={edx}"(edx)
245+
: "{ecx}"(xcr_no)
246+
: :);
247+
((edx as u64) << 32) | (eax as u64)
248+
}
249+
250+
// This is safe because on x86 `xgetbv` is always available.
251+
if unsafe { xgetbv(0) } & 6 == 6 {
252+
if inv_test_bit(proc_info_ecx, 28) {
253+
value = set_bit(value, __Feature::avx as u32);
254+
}
255+
if inv_test_bit(extended_features_ebx, 5) {
256+
value = set_bit(value, __Feature::avx2 as u32);
257+
}
258+
}
259+
}
260+
239261
value
240262
}
241263

0 commit comments

Comments
 (0)