Skip to content

Commit 636dca6

Browse files
committed
Calling xgetbv is safe only if the OS has set osxsave.
Closes #500.
1 parent df29b6f commit 636dca6

File tree

1 file changed

+57
-54
lines changed

1 file changed

+57
-54
lines changed

stdsimd/arch/detect/os/x86.rs

+57-54
Original file line numberDiff line numberDiff line change
@@ -150,64 +150,67 @@ fn detect_features() -> cache::Initializer {
150150
// [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
151151
let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
152152

153-
// 2. The OS must have signaled the CPU that it supports saving and
154-
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
155-
// `XCR0.AVX[2]` to `1`.
156-
//
157-
// This is safe because the CPU supports `xsave`
158-
let xcr0 = unsafe { _xgetbv(0) };
159-
let os_avx_support = xcr0 & 6 == 6;
160-
let os_avx512_support = xcr0 & 224 == 224;
161-
162-
// Only if the OS and the CPU support saving/restoring the AVX
163-
// registers we enable `xsave` support:
164-
if cpu_osxsave && os_avx_support {
165-
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
166-
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
167-
// Developer’s Manual, Volume 1: Basic Architecture":
168-
//
169-
// "Software enables the XSAVE feature set by setting
170-
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
171-
// instruction). If this bit is 0, execution of any of XGETBV,
172-
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
173-
// causes an invalid-opcode exception (#UD)"
153+
if cpu_osxsave {
154+
// 2. The OS must have signaled the CPU that it supports saving and
155+
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
156+
// `XCR0.AVX[2]` to `1`.
174157
//
175-
enable(proc_info_ecx, 26, Feature::xsave);
158+
// This is safe because the CPU supports `xsave`
159+
// and the OS has set `osxsave`.
160+
let xcr0 = unsafe { _xgetbv(0) };
161+
let os_avx_support = xcr0 & 6 == 6;
162+
let os_avx512_support = xcr0 & 224 == 224;
176163

177-
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
178-
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
179-
// ECX = 1):
180-
if max_basic_leaf >= 0xd {
181-
let CpuidResult {
182-
eax: proc_extended_state1_eax,
183-
..
184-
} = unsafe { __cpuid_count(0xd_u32, 1) };
185-
enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
186-
enable(proc_extended_state1_eax, 1, Feature::xsavec);
187-
enable(proc_extended_state1_eax, 3, Feature::xsaves);
188-
}
164+
// Only if the OS and the CPU support saving/restoring the AVX
165+
// registers we enable `xsave` support:
166+
if os_avx_support {
167+
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
168+
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
169+
// Developer’s Manual, Volume 1: Basic Architecture":
170+
//
171+
// "Software enables the XSAVE feature set by setting
172+
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
173+
// instruction). If this bit is 0, execution of any of XGETBV,
174+
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
175+
// causes an invalid-opcode exception (#UD)"
176+
//
177+
enable(proc_info_ecx, 26, Feature::xsave);
178+
179+
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
180+
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
181+
// ECX = 1):
182+
if max_basic_leaf >= 0xd {
183+
let CpuidResult {
184+
eax: proc_extended_state1_eax,
185+
..
186+
} = unsafe { __cpuid_count(0xd_u32, 1) };
187+
enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
188+
enable(proc_extended_state1_eax, 1, Feature::xsavec);
189+
enable(proc_extended_state1_eax, 3, Feature::xsaves);
190+
}
189191

190-
// And AVX/AVX2:
191-
enable(proc_info_ecx, 28, Feature::avx);
192-
enable(extended_features_ebx, 5, Feature::avx2);
192+
// And AVX/AVX2:
193+
enable(proc_info_ecx, 28, Feature::avx);
194+
enable(extended_features_ebx, 5, Feature::avx2);
193195

194-
// For AVX-512 the OS also needs to support saving/restoring
195-
// the extended state, only then we enable AVX-512 support:
196-
if os_avx512_support {
197-
enable(extended_features_ebx, 16, Feature::avx512f);
198-
enable(extended_features_ebx, 17, Feature::avx512dq);
199-
enable(extended_features_ebx, 21, Feature::avx512_ifma);
200-
enable(extended_features_ebx, 26, Feature::avx512pf);
201-
enable(extended_features_ebx, 27, Feature::avx512er);
202-
enable(extended_features_ebx, 28, Feature::avx512cd);
203-
enable(extended_features_ebx, 30, Feature::avx512bw);
204-
enable(extended_features_ebx, 31, Feature::avx512vl);
205-
enable(extended_features_ecx, 1, Feature::avx512_vbmi);
206-
enable(
207-
extended_features_ecx,
208-
14,
209-
Feature::avx512_vpopcntdq,
210-
);
196+
// For AVX-512 the OS also needs to support saving/restoring
197+
// the extended state, only then we enable AVX-512 support:
198+
if os_avx512_support {
199+
enable(extended_features_ebx, 16, Feature::avx512f);
200+
enable(extended_features_ebx, 17, Feature::avx512dq);
201+
enable(extended_features_ebx, 21, Feature::avx512_ifma);
202+
enable(extended_features_ebx, 26, Feature::avx512pf);
203+
enable(extended_features_ebx, 27, Feature::avx512er);
204+
enable(extended_features_ebx, 28, Feature::avx512cd);
205+
enable(extended_features_ebx, 30, Feature::avx512bw);
206+
enable(extended_features_ebx, 31, Feature::avx512vl);
207+
enable(extended_features_ecx, 1, Feature::avx512_vbmi);
208+
enable(
209+
extended_features_ecx,
210+
14,
211+
Feature::avx512_vpopcntdq,
212+
);
213+
}
211214
}
212215
}
213216
}

0 commit comments

Comments
 (0)