Skip to content

Commit 302f01e

Browse files
eduardosmAmanieu
authored andcommitted
Re-implement some AVX functions without LLVM intrinsics
1 parent 45f064e commit 302f01e

File tree

1 file changed

+4
-12
lines changed
  • crates/core_arch/src/x86

1 file changed

+4
-12
lines changed

crates/core_arch/src/x86/avx.rs

+4-12
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@ pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
851851
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
852852
#[stable(feature = "simd_x86", since = "1.27.0")]
853853
pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
854-
vcvtdq2ps(a.as_i32x8())
854+
simd_cast(a.as_i32x8())
855855
}
856856

857857
/// Converts packed double-precision (64-bit) floating-point elements in `a`
@@ -863,7 +863,7 @@ pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
863863
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
864864
#[stable(feature = "simd_x86", since = "1.27.0")]
865865
pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
866-
vcvtpd2ps(a)
866+
simd_cast(a)
867867
}
868868

869869
/// Converts packed single-precision (32-bit) floating-point elements in `a`
@@ -1237,7 +1237,7 @@ pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
12371237
#[cfg_attr(test, assert_instr(vbroadcastf128))]
12381238
#[stable(feature = "simd_x86", since = "1.27.0")]
12391239
pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1240-
vbroadcastf128ps256(a)
1240+
simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
12411241
}
12421242

12431243
/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
@@ -1249,7 +1249,7 @@ pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
12491249
#[cfg_attr(test, assert_instr(vbroadcastf128))]
12501250
#[stable(feature = "simd_x86", since = "1.27.0")]
12511251
pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1252-
vbroadcastf128pd256(a)
1252+
simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
12531253
}
12541254

12551255
/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
@@ -2948,10 +2948,6 @@ extern "C" {
29482948
fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
29492949
#[link_name = "llvm.x86.sse.cmp.ss"]
29502950
fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2951-
#[link_name = "llvm.x86.avx.cvtdq2.ps.256"]
2952-
fn vcvtdq2ps(a: i32x8) -> __m256;
2953-
#[link_name = "llvm.x86.avx.cvt.pd2.ps.256"]
2954-
fn vcvtpd2ps(a: __m256d) -> __m128;
29552951
#[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
29562952
fn vcvtps2dq(a: __m256) -> i32x8;
29572953
#[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
@@ -2978,10 +2974,6 @@ extern "C" {
29782974
fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
29792975
#[link_name = "llvm.x86.avx.vperm2f128.si.256"]
29802976
fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
2981-
#[link_name = "llvm.x86.avx.vbroadcastf128.ps.256"]
2982-
fn vbroadcastf128ps256(a: &__m128) -> __m256;
2983-
#[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"]
2984-
fn vbroadcastf128pd256(a: &__m128d) -> __m256d;
29852977
#[link_name = "llvm.x86.avx.maskload.pd.256"]
29862978
fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
29872979
#[link_name = "llvm.x86.avx.maskstore.pd.256"]

0 commit comments

Comments
 (0)