Add an i586 builder

alexcrichton · alexcrichton · commit dd9d334426dc · 2017-10-06T15:44:49.000-07:00
The i586 targets on x86 are defined to be 32-bit and lacking in sse/sse2 unlike
the i686 target which has sse2 turned on by default. I was mostly curious what
would happen when turning on this target, and it turns out quite a few tests
failed!

Most of the tests here had to do with calling functions with ABI mismatches
where the callee wasn't `#[inline(always)]`. Various pieces have been updated
now and we should be passing all tests.

Only one instruction assertion ended up changing where the function generates a
different instruction with sse2 ambiently enabled and without it enabled.
diff --git a/.travis.yml b/.travis.yml
@@ -4,6 +4,7 @@ rust: nightly
 
 matrix:
   include:
+    - env: TARGET=i586-unknown-linux-gnu
     - env: TARGET=i686-unknown-linux-gnu
     - env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1
     - env: TARGET=arm-unknown-linux-gnueabihf
diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:17.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc-multilib \
+  libc6-dev \
+  file \
+  make \
+  ca-certificates
diff --git a/ci/run.sh b/ci/run.sh
@@ -2,5 +2,5 @@
 
 set -ex
 
-cargo test --target $TARGET
-cargo test --release --target $TARGET
+cargo test --target $TARGET -v
+cargo test --release --target $TARGET -v
diff --git a/src/x86/avx.rs b/src/x86/avx.rs
@@ -135,7 +135,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
     mem::transmute((!a) & b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements 
+/// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -144,7 +144,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
     maxpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, 
+/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
 /// and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -153,7 +153,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
     maxps256(a, b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements 
+/// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -162,7 +162,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
     minpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, 
+/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
 /// and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -711,21 +711,21 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_ps() -> f32x8 {
-    mem::uninitialized()
+    f32x8::splat(mem::uninitialized())
 }
 
 /// Return vector of type `f64x4` with undefined elements.
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_pd() -> f64x4 {
-    mem::uninitialized()
+    f64x4::splat(mem::uninitialized())
 }
 
 /// Return vector of type `i64x4` with undefined elements.
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_si256() -> i64x4 {
-    mem::uninitialized()
+    i64x4::splat(mem::uninitialized())
 }
 
 /// LLVM intrinsics used in the above functions
diff --git a/src/x86/sse.rs b/src/x86/sse.rs
@@ -252,7 +252,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
 /// half of result.
 #[inline(always)]
 #[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(unpcklpd))]
+#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
+#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
 pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [0, 1, 4, 5])
 }
@@ -851,7 +852,7 @@ mod tests {
         let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
 
         sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_ON);
-        let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+        let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
         sse::_mm_setcsr(saved_csr);
 
@@ -869,7 +870,7 @@ mod tests {
         let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
 
         sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_OFF);
-        let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+        let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
         sse::_mm_setcsr(saved_csr);
 
@@ -886,7 +887,7 @@ mod tests {
 
         assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0);  // just to be sure
 
-        let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+        let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
         let exp = f32x4::new(1.1e-41, 0.0, 0.0, 1.0);
         assert_eq!(r, exp);
diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
@@ -891,7 +891,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
-    let mut dst = mem::uninitialized();
+    let mut dst = __m128i::splat(mem::uninitialized());
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut __m128i as *mut u8,
diff --git a/src/x86/sse42.rs b/src/x86/sse42.rs
@@ -638,6 +638,7 @@ mod tests {
     // a bit difficult. Rather than `load` and mutate the __m128i,
     // it is easier to memcpy the given string to a local slice with
     // length 16 and `load` the local slice.
+    #[target_feature = "+sse4.2"]
     unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
         assert!(s.len() <= 16);
         let slice = &mut [0u8; 16];