Skip to content

Commit 93ba4a5

Browse files
gnzlbgalexcrichton
authored andcommitted
Calling xgetbv is safe only if the OS has set osxsave.
Closes #500.
1 parent a19ca1c commit 93ba4a5

File tree

1 file changed

+57
-54
lines changed

1 file changed

+57
-54
lines changed

stdsimd/arch/detect/os/x86.rs

+57-54
Original file line numberDiff line numberDiff line change
@@ -149,64 +149,67 @@ pub fn detect_features() -> cache::Initializer {
149149
// [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
150150
let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
151151

152-
// 2. The OS must have signaled the CPU that it supports saving and
153-
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
154-
// `XCR0.AVX[2]` to `1`.
155-
//
156-
// This is safe because the CPU supports `xsave`
157-
let xcr0 = unsafe { _xgetbv(0) };
158-
let os_avx_support = xcr0 & 6 == 6;
159-
let os_avx512_support = xcr0 & 224 == 224;
160-
161-
// Only if the OS and the CPU support saving/restoring the AVX
162-
// registers we enable `xsave` support:
163-
if cpu_osxsave && os_avx_support {
164-
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
165-
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
166-
// Developer’s Manual, Volume 1: Basic Architecture":
167-
//
168-
// "Software enables the XSAVE feature set by setting
169-
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
170-
// instruction). If this bit is 0, execution of any of XGETBV,
171-
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
172-
// causes an invalid-opcode exception (#UD)"
152+
if cpu_osxsave {
153+
// 2. The OS must have signaled the CPU that it supports saving and
154+
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
155+
// `XCR0.AVX[2]` to `1`.
173156
//
174-
enable(proc_info_ecx, 26, Feature::xsave);
157+
// This is safe because the CPU supports `xsave`
158+
// and the OS has set `osxsave`.
159+
let xcr0 = unsafe { _xgetbv(0) };
160+
let os_avx_support = xcr0 & 6 == 6;
161+
let os_avx512_support = xcr0 & 224 == 224;
175162

176-
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
177-
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
178-
// ECX = 1):
179-
if max_basic_leaf >= 0xd {
180-
let CpuidResult {
181-
eax: proc_extended_state1_eax,
182-
..
183-
} = unsafe { __cpuid_count(0xd_u32, 1) };
184-
enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
185-
enable(proc_extended_state1_eax, 1, Feature::xsavec);
186-
enable(proc_extended_state1_eax, 3, Feature::xsaves);
187-
}
163+
// Only if the OS and the CPU support saving/restoring the AVX
164+
// registers we enable `xsave` support:
165+
if os_avx_support {
166+
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
167+
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
168+
// Developer’s Manual, Volume 1: Basic Architecture":
169+
//
170+
// "Software enables the XSAVE feature set by setting
171+
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
172+
// instruction). If this bit is 0, execution of any of XGETBV,
173+
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
174+
// causes an invalid-opcode exception (#UD)"
175+
//
176+
enable(proc_info_ecx, 26, Feature::xsave);
177+
178+
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
179+
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
180+
// ECX = 1):
181+
if max_basic_leaf >= 0xd {
182+
let CpuidResult {
183+
eax: proc_extended_state1_eax,
184+
..
185+
} = unsafe { __cpuid_count(0xd_u32, 1) };
186+
enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
187+
enable(proc_extended_state1_eax, 1, Feature::xsavec);
188+
enable(proc_extended_state1_eax, 3, Feature::xsaves);
189+
}
188190

189-
// And AVX/AVX2:
190-
enable(proc_info_ecx, 28, Feature::avx);
191-
enable(extended_features_ebx, 5, Feature::avx2);
191+
// And AVX/AVX2:
192+
enable(proc_info_ecx, 28, Feature::avx);
193+
enable(extended_features_ebx, 5, Feature::avx2);
192194

193-
// For AVX-512 the OS also needs to support saving/restoring
194-
// the extended state, only then we enable AVX-512 support:
195-
if os_avx512_support {
196-
enable(extended_features_ebx, 16, Feature::avx512f);
197-
enable(extended_features_ebx, 17, Feature::avx512dq);
198-
enable(extended_features_ebx, 21, Feature::avx512_ifma);
199-
enable(extended_features_ebx, 26, Feature::avx512pf);
200-
enable(extended_features_ebx, 27, Feature::avx512er);
201-
enable(extended_features_ebx, 28, Feature::avx512cd);
202-
enable(extended_features_ebx, 30, Feature::avx512bw);
203-
enable(extended_features_ebx, 31, Feature::avx512vl);
204-
enable(extended_features_ecx, 1, Feature::avx512_vbmi);
205-
enable(
206-
extended_features_ecx,
207-
14,
208-
Feature::avx512_vpopcntdq,
209-
);
195+
// For AVX-512 the OS also needs to support saving/restoring
196+
// the extended state, only then we enable AVX-512 support:
197+
if os_avx512_support {
198+
enable(extended_features_ebx, 16, Feature::avx512f);
199+
enable(extended_features_ebx, 17, Feature::avx512dq);
200+
enable(extended_features_ebx, 21, Feature::avx512_ifma);
201+
enable(extended_features_ebx, 26, Feature::avx512pf);
202+
enable(extended_features_ebx, 27, Feature::avx512er);
203+
enable(extended_features_ebx, 28, Feature::avx512cd);
204+
enable(extended_features_ebx, 30, Feature::avx512bw);
205+
enable(extended_features_ebx, 31, Feature::avx512vl);
206+
enable(extended_features_ecx, 1, Feature::avx512_vbmi);
207+
enable(
208+
extended_features_ecx,
209+
14,
210+
Feature::avx512_vpopcntdq,
211+
);
212+
}
210213
}
211214
}
212215
}

0 commit comments

Comments
 (0)