From 7f127389362cb696f07bc7c3183ea01554767848 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 23 Jun 2018 13:05:15 +0200 Subject: [PATCH 01/11] Fixes has_cpuid implementation https://github.com/rust-lang/rust/issues/51691 --- coresimd/x86/cpuid.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index 7e000625ce..af9f88fce0 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -96,7 +96,7 @@ pub fn has_cpuid() -> bool { let eflags: u32 = __readeflags(); // Invert the ID bit in EFLAGS: - let eflags_mod: u32 = eflags | 0x0020_0000; + let eflags_mod: u32 = eflags ^ 0x0020_0000; // Store the modified EFLAGS (ID bit may or may not be inverted) __writeeflags(eflags_mod); @@ -138,6 +138,11 @@ mod tests { assert!(cpuid::has_cpuid()); } + #[test] + fn test_has_cpuid_idempotent() { + assert_eq!(cpuid::has_cpuid(), cpuid::has_cpuid()); + } + #[cfg(target_arch = "x86")] #[test] fn test_has_cpuid() { From a374f60e86d1a358d0a45b4d0ff9489a8b456283 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 23 Jun 2018 14:06:03 +0200 Subject: [PATCH 02/11] use inline assembly instead of the __{read,write}eflags intrinsics --- coresimd/x86/cpuid.rs | 49 ++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index af9f88fce0..fc96030bb3 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -86,26 +86,37 @@ pub fn has_cpuid() -> bool { } #[cfg(target_arch = "x86")] { - use coresimd::x86::{__readeflags, __writeeflags}; - - // On `x86` the `cpuid` instruction is not always available. - // This follows the approach indicated in: - // http://wiki.osdev.org/CPUID#Checking_CPUID_availability unsafe { - // Read EFLAGS: - let eflags: u32 = __readeflags(); - - // Invert the ID bit in EFLAGS: - let eflags_mod: u32 = eflags ^ 0x0020_0000; - - // Store the modified EFLAGS (ID bit may or may not be inverted) - __writeeflags(eflags_mod); - - // Read EFLAGS again: - let eflags_after: u32 = __readeflags(); - - // Check if the ID bit changed: - eflags_after != eflags + // On `x86` the `cpuid` instruction is not always available. + // This follows the approach indicated in: + // http://wiki.osdev.org/CPUID#Checking_CPUID_availability + // https://software.intel.com/en-us/articles/using-cpuid-to-detect-the-presence-of-sse-41-and-sse-42-instruction-sets/ + // which detects whether `cpuid` is available by checking whether the 21th bit of the EFLAGS register is modifiable or not. + // If it is, then `cpuid` is available. + let eax: i32; + asm!(r#" + push %ecx + pushfd + pushfd + pop %eax + mov %ecx, %eax + xor %eax, 0x200000 + push %eax + popfd + pushfd + pop %eax + xor %eax, %ecx + shrd %eax, 21 + popfd + pop %ecx + "# + : "={eax}"(eax) // output operands + : // input operands + : "memory", "ecx" // clobbers all memory + : "volatile" // has side-effects + ); + debug_assert!(eax == 0 || eax == 1); + eax == 1 } } } From f87c949b445b77cbfe9e96775b7b328c256caf8e Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 23 Jun 2018 14:40:44 +0200 Subject: [PATCH 03/11] fix has_cpuid_test --- coresimd/x86/cpuid.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index fc96030bb3..b977946ca7 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -152,19 +152,10 @@ mod tests { #[test] fn test_has_cpuid_idempotent() { assert_eq!(cpuid::has_cpuid(), cpuid::has_cpuid()); - } - - #[cfg(target_arch = "x86")] - #[test] - fn test_has_cpuid() { - unsafe { - let before = __readeflags(); - if cpuid::has_cpuid() { - assert!(before != __readeflags()); - } else { - assert!(before == __readeflags()); - } - } + let before = __readeflags(); + let _ = cpuid::has_cpuid(); + let after = __readeflags(); + assert_eq!(before, after); } } From 5760ad3fb1b767c99e1ec6dcb45de0926df03eed Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 23 Jun 2018 15:03:30 +0200 Subject: [PATCH 04/11] fix test and add comments --- coresimd/x86/cpuid.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index b977946ca7..4a70be4e92 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -91,22 +91,36 @@ pub fn has_cpuid() -> bool { // This follows the approach indicated in: // http://wiki.osdev.org/CPUID#Checking_CPUID_availability // https://software.intel.com/en-us/articles/using-cpuid-to-detect-the-presence-of-sse-41-and-sse-42-instruction-sets/ - // which detects whether `cpuid` is available by checking whether the 21th bit of the EFLAGS register is modifiable or not. + // which detects whether `cpuid` is available by checking whether the 21st bit of the EFLAGS register is modifiable or not. // If it is, then `cpuid` is available. let eax: i32; asm!(r#" + # Save ecx (__fastcall needs it preserved) and save a + # copy of the original eflags that we will restore later: push %ecx pushfd + # Copy eflags to ecx and eax: pushfd pop %eax mov %ecx, %eax + # Flip 21st bit and write back to eflags register: xor %eax, 0x200000 push %eax popfd + # Read eflags register again: pushfd pop %eax + # If cpuid is available, the bit will still be flipped + # and it will be the only bit modified. + # + # xor with the original eflags should produce a 1 in + # the 21st bit in this case, and zeros for all other bits: xor %eax, %ecx + # So if the value of the 21st bit is 1, cpuid is available, + # and if it is zero, it isn't because we didn't manage to + # modify it: shrd %eax, 21 + # Restore original eflags and ecx: popfd pop %ecx "# @@ -152,10 +166,5 @@ mod tests { #[test] fn test_has_cpuid_idempotent() { assert_eq!(cpuid::has_cpuid(), cpuid::has_cpuid()); - - let before = __readeflags(); - let _ = cpuid::has_cpuid(); - let after = __readeflags(); - assert_eq!(before, after); } } From b12aef60a1ee046d282ede8829047feb96581ace Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 23 Jun 2018 15:18:17 +0200 Subject: [PATCH 05/11] no need to save/restore ecx in the inline assembly because its already clobbered --- coresimd/x86/cpuid.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index 4a70be4e92..31f943a300 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -95,9 +95,7 @@ pub fn has_cpuid() -> bool { // If it is, then `cpuid` is available. let eax: i32; asm!(r#" - # Save ecx (__fastcall needs it preserved) and save a - # copy of the original eflags that we will restore later: - push %ecx + # Save a copy of the original eflags that we will restore later: pushfd # Copy eflags to ecx and eax: pushfd @@ -120,13 +118,12 @@ pub fn has_cpuid() -> bool { # and if it is zero, it isn't because we didn't manage to # modify it: shrd %eax, 21 - # Restore original eflags and ecx: + # Restore original eflags popfd - pop %ecx "# : "={eax}"(eax) // output operands : // input operands - : "memory", "ecx" // clobbers all memory + : "memory", "ecx" // clobbers all memory and ecx : "volatile" // has side-effects ); debug_assert!(eax == 0 || eax == 1); From 277a49a84b998ee50886c2368ae7eefb025f5d64 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sat, 23 Jun 2018 17:05:04 +0200 Subject: [PATCH 06/11] fix the shift --- coresimd/x86/cpuid.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index 31f943a300..7a8c2f273c 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -78,7 +78,7 @@ pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { } /// Does the host support the `cpuid` instruction? -#[inline] +#[inline(never)] pub fn has_cpuid() -> bool { #[cfg(target_arch = "x86_64")] { @@ -117,7 +117,7 @@ pub fn has_cpuid() -> bool { # So if the value of the 21st bit is 1, cpuid is available, # and if it is zero, it isn't because we didn't manage to # modify it: - shrd %eax, 21 + shrl %eax, 21 # Restore original eflags popfd "# From 3605ad3dc66fe7fa473dfee9bb9a3a16e0568565 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 24 Jun 2018 11:07:16 +0200 Subject: [PATCH 07/11] Use Stephen Checkoway solution --- coresimd/x86/cpuid.rs | 67 +++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index 7a8c2f273c..b708542c39 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -93,41 +93,38 @@ pub fn has_cpuid() -> bool { // https://software.intel.com/en-us/articles/using-cpuid-to-detect-the-presence-of-sse-41-and-sse-42-instruction-sets/ // which detects whether `cpuid` is available by checking whether the 21st bit of the EFLAGS register is modifiable or not. // If it is, then `cpuid` is available. - let eax: i32; - asm!(r#" - # Save a copy of the original eflags that we will restore later: - pushfd - # Copy eflags to ecx and eax: - pushfd - pop %eax - mov %ecx, %eax - # Flip 21st bit and write back to eflags register: - xor %eax, 0x200000 - push %eax - popfd - # Read eflags register again: - pushfd - pop %eax - # If cpuid is available, the bit will still be flipped - # and it will be the only bit modified. - # - # xor with the original eflags should produce a 1 in - # the 21st bit in this case, and zeros for all other bits: - xor %eax, %ecx - # So if the value of the 21st bit is 1, cpuid is available, - # and if it is zero, it isn't because we didn't manage to - # modify it: - shrl %eax, 21 - # Restore original eflags - popfd - "# - : "={eax}"(eax) // output operands - : // input operands - : "memory", "ecx" // clobbers all memory and ecx - : "volatile" // has side-effects - ); - debug_assert!(eax == 0 || eax == 1); - eax == 1 + let result: u32; + let _temp: u32; + unsafe { + asm!(r#" + # Read eflags into $0 and copy into $1: + pushfd + pop $0 + mov $1, $0 + # Flip 21st bit: + xor $0, 0x200000 + # Set eflags: + push $0 + popfd + # Read eflags again, if cpuid is available + # the 21st bit will be flipped, otherwise it + # it will have the same value as the original in $1: + pushfd + pop $0 + # Xor'ing with the original eflags should have the + # 21st bit set to true if cpuid is available and zero + # otherwise. All other bits have not been modified and + # are zero: + xor $0, $1 + # Store in $0 the value of the 21st bit + shr $0, 21 + "# + : "=r"(result), "=r"(_temp) + : + : "cc", "memory" + : "intel"); + } + result != 0 } } } From c70093bdd409991dfd90b63e86999d15f177fa46 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 24 Jun 2018 11:57:20 +0200 Subject: [PATCH 08/11] remove unnecessary shift --- coresimd/x86/cpuid.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index b708542c39..72d68e71f4 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -78,7 +78,7 @@ pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { } /// Does the host support the `cpuid` instruction? -#[inline(never)] +#[inline] pub fn has_cpuid() -> bool { #[cfg(target_arch = "x86_64")] { @@ -116,14 +116,15 @@ pub fn has_cpuid() -> bool { # otherwise. All other bits have not been modified and # are zero: xor $0, $1 - # Store in $0 the value of the 21st bit - shr $0, 21 "# : "=r"(result), "=r"(_temp) : : "cc", "memory" : "intel"); } + // Therefore, if result is 0, the bit was not modified and cpuid is + // not available. If cpuid is available, the bit was modified and + // result != 0. result != 0 } } From 6ee5955027042efef8273cecf9cb36f014338969 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 24 Jun 2018 16:21:45 +0200 Subject: [PATCH 09/11] remove unnecesary unsafe block --- coresimd/x86/cpuid.rs | 52 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index 72d68e71f4..f367294c6d 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -95,33 +95,31 @@ pub fn has_cpuid() -> bool { // If it is, then `cpuid` is available. let result: u32; let _temp: u32; - unsafe { - asm!(r#" - # Read eflags into $0 and copy into $1: - pushfd - pop $0 - mov $1, $0 - # Flip 21st bit: - xor $0, 0x200000 - # Set eflags: - push $0 - popfd - # Read eflags again, if cpuid is available - # the 21st bit will be flipped, otherwise it - # it will have the same value as the original in $1: - pushfd - pop $0 - # Xor'ing with the original eflags should have the - # 21st bit set to true if cpuid is available and zero - # otherwise. All other bits have not been modified and - # are zero: - xor $0, $1 - "# - : "=r"(result), "=r"(_temp) - : - : "cc", "memory" - : "intel"); - } + asm!(r#" + # Read eflags into $0 and copy into $1: + pushfd + pop $0 + mov $1, $0 + # Flip 21st bit: + xor $0, 0x200000 + # Set eflags: + push $0 + popfd + # Read eflags again, if cpuid is available + # the 21st bit will be flipped, otherwise it + # it will have the same value as the original in $1: + pushfd + pop $0 + # Xor'ing with the original eflags should have the + # 21st bit set to true if cpuid is available and zero + # otherwise. All other bits have not been modified and + # are zero: + xor $0, $1 + "# + : "=r"(result), "=r"(_temp) + : + : "cc", "memory" + : "intel"); // Therefore, if result is 0, the bit was not modified and cpuid is // not available. If cpuid is available, the bit was modified and // result != 0. From 9f9427ef7ff671606290398627800e787545bd26 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 24 Jun 2018 19:20:53 +0200 Subject: [PATCH 10/11] use and to test the 21st bit --- coresimd/x86/cpuid.rs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index f367294c6d..b36a09e1de 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -96,31 +96,36 @@ pub fn has_cpuid() -> bool { let result: u32; let _temp: u32; asm!(r#" - # Read eflags into $0 and copy into $1: + # Read eflags into $0 and copy it into $1: pushfd pop $0 mov $1, $0 - # Flip 21st bit: + # Flip 21st bit of $0. xor $0, 0x200000 - # Set eflags: + # Set eflags to the value of $0 + # + # Bit 21st can only be modified if cpuid is available push $0 - popfd - # Read eflags again, if cpuid is available - # the 21st bit will be flipped, otherwise it - # it will have the same value as the original in $1: - pushfd + popfd # A + # Read eflags into $0: + pushfd # B pop $0 - # Xor'ing with the original eflags should have the - # 21st bit set to true if cpuid is available and zero - # otherwise. All other bits have not been modified and - # are zero: + # xor with the original eflags sets the bits that + # have been modified: xor $0, $1 + # There is a race between popfd (A) and pushfd (B) + # where other bits beyond 21st may have been modified due to + # interrupts, a debugger stepping through the asm, etc. + # + # Therefore, explicitly check whether the 21st bit + # was modified or not: + and $0, 0x200000 "# : "=r"(result), "=r"(_temp) : : "cc", "memory" : "intel"); - // Therefore, if result is 0, the bit was not modified and cpuid is + // If result == 0, the 21st bit was not modified and cpuid is // not available. If cpuid is available, the bit was modified and // result != 0. result != 0 From 1831237174fd044acab23175b7c1795cca821cd5 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 24 Jun 2018 22:03:15 +0200 Subject: [PATCH 11/11] hoist the and out of the inline assembly --- coresimd/x86/cpuid.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/coresimd/x86/cpuid.rs b/coresimd/x86/cpuid.rs index b36a09e1de..3b2fed6339 100644 --- a/coresimd/x86/cpuid.rs +++ b/coresimd/x86/cpuid.rs @@ -113,22 +113,23 @@ pub fn has_cpuid() -> bool { # xor with the original eflags sets the bits that # have been modified: xor $0, $1 - # There is a race between popfd (A) and pushfd (B) - # where other bits beyond 21st may have been modified due to - # interrupts, a debugger stepping through the asm, etc. - # - # Therefore, explicitly check whether the 21st bit - # was modified or not: - and $0, 0x200000 "# : "=r"(result), "=r"(_temp) : : "cc", "memory" : "intel"); - // If result == 0, the 21st bit was not modified and cpuid is - // not available. If cpuid is available, the bit was modified and - // result != 0. - result != 0 + // There is a race between popfd (A) and pushfd (B) + // where other bits beyond 21st may have been modified due to + // interrupts, a debugger stepping through the asm, etc. + // + // Therefore, explicitly check whether the 21st bit + // was modified or not. + // + // If the result is zero, the cpuid bit was not modified. + // If the result is 0x200000 (non-zero), then the cpuid + // was correctly modified and the CPU supports the cpuid + // instruction: + (result & 0x200000) != 0 } } }