diff --git a/.travis.yml b/.travis.yml index f2288a7710..06f1bce392 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,9 @@ sudo: false rust: nightly matrix: + fast_finish: true include: + - env: TARGET=i586-unknown-linux-gnu - env: TARGET=i686-unknown-linux-gnu - env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1 - env: TARGET=arm-unknown-linux-gnueabihf diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000..2bea700920 --- /dev/null +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:17.04 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc-multilib \ + libc6-dev \ + file \ + make \ + ca-certificates diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 60a4aeea2e..20d5aa2bbe 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -135,7 +135,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 { mem::transmute((!a) & b) } -/// Compare packed double-precision (64-bit) floating-point elements +/// Compare packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and return packed maximum values #[inline(always)] #[target_feature = "+avx"] @@ -144,7 +144,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 { maxpd256(a, b) } -/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, +/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, /// and return packed maximum values #[inline(always)] #[target_feature = "+avx"] @@ -153,7 +153,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 { maxps256(a, b) } -/// Compare packed double-precision (64-bit) floating-point elements +/// Compare packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and return packed minimum values #[inline(always)] #[target_feature = "+avx"] @@ -162,7 +162,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 { minpd256(a, b) } -/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, +/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, /// and return packed minimum values #[inline(always)] #[target_feature = "+avx"] @@ -711,21 +711,21 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_undefined_ps() -> f32x8 { - mem::uninitialized() + f32x8::splat(mem::uninitialized()) } /// Return vector of type `f64x4` with undefined elements. #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_undefined_pd() -> f64x4 { - mem::uninitialized() + f64x4::splat(mem::uninitialized()) } /// Return vector of type `i64x4` with undefined elements. #[inline(always)] #[target_feature = "+avx"] pub unsafe fn _mm256_undefined_si256() -> i64x4 { - mem::uninitialized() + i64x4::splat(mem::uninitialized()) } /// LLVM intrinsics used in the above functions diff --git a/src/x86/sse.rs b/src/x86/sse.rs index e83e59e0e8..109b1a26b9 100644 --- a/src/x86/sse.rs +++ b/src/x86/sse.rs @@ -252,7 +252,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 { /// half of result. #[inline(always)] #[target_feature = "+sse"] -#[cfg_attr(test, assert_instr(unpcklpd))] +#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))] +#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))] pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 { simd_shuffle4(a, b, [0, 1, 4, 5]) } @@ -851,7 +852,7 @@ mod tests { let b = f32x4::new(0.001, 0.0, 0.0, 1.0); sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_ON); - let r = sse::_mm_mul_ps(black_box(a), black_box(b)); + let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b)); sse::_mm_setcsr(saved_csr); @@ -869,7 +870,7 @@ mod tests { let b = f32x4::new(0.001, 0.0, 0.0, 1.0); sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_OFF); - let r = sse::_mm_mul_ps(black_box(a), black_box(b)); + let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b)); sse::_mm_setcsr(saved_csr); @@ -886,7 +887,7 @@ mod tests { assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0); // just to be sure - let r = sse::_mm_mul_ps(black_box(a), black_box(b)); + let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b)); let exp = f32x4::new(1.1e-41, 0.0, 0.0, 1.0); assert_eq!(r, exp); diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index e7c5f366ac..a88d514a7f 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -891,7 +891,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movups))] pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { - let mut dst = mem::uninitialized(); + let mut dst = __m128i::splat(mem::uninitialized()); ptr::copy_nonoverlapping( mem_addr as *const u8, &mut dst as *mut __m128i as *mut u8, diff --git a/src/x86/sse42.rs b/src/x86/sse42.rs index 9145398bd7..afd284824c 100644 --- a/src/x86/sse42.rs +++ b/src/x86/sse42.rs @@ -638,6 +638,7 @@ mod tests { // a bit difficult. Rather than `load` and mutate the __m128i, // it is easier to memcpy the given string to a local slice with // length 16 and `load` the local slice. + #[target_feature = "+sse4.2"] unsafe fn str_to_m128i(s: &[u8]) -> __m128i { assert!(s.len() <= 16); let slice = &mut [0u8; 16];