diff --git a/.travis.yml b/.travis.yml
index f2288a7710..06f1bce392 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,9 @@ sudo: false
 rust: nightly
 
 matrix:
+  fast_finish: true
   include:
+    - env: TARGET=i586-unknown-linux-gnu
     - env: TARGET=i686-unknown-linux-gnu
     - env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1
     - env: TARGET=arm-unknown-linux-gnueabihf
diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..2bea700920
--- /dev/null
+++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:17.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc-multilib \
+  libc6-dev \
+  file \
+  make \
+  ca-certificates
diff --git a/src/x86/avx.rs b/src/x86/avx.rs
index 60a4aeea2e..20d5aa2bbe 100644
--- a/src/x86/avx.rs
+++ b/src/x86/avx.rs
@@ -135,7 +135,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
     mem::transmute((!a) & b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements 
+/// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -144,7 +144,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
     maxpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, 
+/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
 /// and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -153,7 +153,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
     maxps256(a, b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements 
+/// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -162,7 +162,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
     minpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, 
+/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
 /// and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -711,21 +711,21 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_ps() -> f32x8 {
-    mem::uninitialized()
+    f32x8::splat(mem::uninitialized())
 }
 
 /// Return vector of type `f64x4` with undefined elements.
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_pd() -> f64x4 {
-    mem::uninitialized()
+    f64x4::splat(mem::uninitialized())
 }
 
 /// Return vector of type `i64x4` with undefined elements.
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_si256() -> i64x4 {
-    mem::uninitialized()
+    i64x4::splat(mem::uninitialized())
 }
 
 /// LLVM intrinsics used in the above functions
diff --git a/src/x86/sse.rs b/src/x86/sse.rs
index e83e59e0e8..109b1a26b9 100644
--- a/src/x86/sse.rs
+++ b/src/x86/sse.rs
@@ -252,7 +252,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
 /// half of result.
 #[inline(always)]
 #[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(unpcklpd))]
+#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
+#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
 pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [0, 1, 4, 5])
 }
@@ -851,7 +852,7 @@ mod tests {
         let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
 
         sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_ON);
-        let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+        let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
         sse::_mm_setcsr(saved_csr);
 
@@ -869,7 +870,7 @@ mod tests {
         let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
 
         sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_OFF);
-        let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+        let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
         sse::_mm_setcsr(saved_csr);
 
@@ -886,7 +887,7 @@ mod tests {
 
         assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0);  // just to be sure
 
-        let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+        let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
         let exp = f32x4::new(1.1e-41, 0.0, 0.0, 1.0);
         assert_eq!(r, exp);
diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
index e7c5f366ac..a88d514a7f 100644
--- a/src/x86/sse2.rs
+++ b/src/x86/sse2.rs
@@ -891,7 +891,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
-    let mut dst = mem::uninitialized();
+    let mut dst = __m128i::splat(mem::uninitialized());
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut __m128i as *mut u8,
diff --git a/src/x86/sse42.rs b/src/x86/sse42.rs
index 9145398bd7..afd284824c 100644
--- a/src/x86/sse42.rs
+++ b/src/x86/sse42.rs
@@ -638,6 +638,7 @@ mod tests {
     // a bit difficult. Rather than `load` and mutate the __m128i,
     // it is easier to memcpy the given string to a local slice with
     // length 16 and `load` the local slice.
+    #[target_feature = "+sse4.2"]
     unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
         assert!(s.len() <= 16);
         let slice = &mut [0u8; 16];