Skip to content

Commit dd9d334

Browse files
committed
Add an i586 builder
The i586 targets on x86 are defined to be 32-bit and lacking in sse/sse2 unlike the i686 target which has sse2 turned on by default. I was mostly curious what would happen when turning on this target, and it turns out quite a few tests failed! Most of the tests here had to do with calling functions with ABI mismatches where the callee wasn't `#[inline(always)]`. Various pieces have been updated now and we should be passing all tests. Only one instruction assertion ended up changing where the function generates a different instruction with sse2 ambiently enabled and without it enabled.
1 parent cab8a5d commit dd9d334

File tree

7 files changed

+24
-14
lines changed

7 files changed

+24
-14
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ rust: nightly
44

55
matrix:
66
include:
7+
- env: TARGET=i586-unknown-linux-gnu
78
- env: TARGET=i686-unknown-linux-gnu
89
- env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1
910
- env: TARGET=arm-unknown-linux-gnueabihf
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM ubuntu:17.04
2+
RUN apt-get update && apt-get install -y --no-install-recommends \
3+
gcc-multilib \
4+
libc6-dev \
5+
file \
6+
make \
7+
ca-certificates

ci/run.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
set -ex
44

5-
cargo test --target $TARGET
6-
cargo test --release --target $TARGET
5+
cargo test --target $TARGET -v
6+
cargo test --release --target $TARGET -v

src/x86/avx.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
135135
mem::transmute((!a) & b)
136136
}
137137

138-
/// Compare packed double-precision (64-bit) floating-point elements
138+
/// Compare packed double-precision (64-bit) floating-point elements
139139
/// in `a` and `b`, and return packed maximum values
140140
#[inline(always)]
141141
#[target_feature = "+avx"]
@@ -144,7 +144,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
144144
maxpd256(a, b)
145145
}
146146

147-
/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
147+
/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
148148
/// and return packed maximum values
149149
#[inline(always)]
150150
#[target_feature = "+avx"]
@@ -153,7 +153,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
153153
maxps256(a, b)
154154
}
155155

156-
/// Compare packed double-precision (64-bit) floating-point elements
156+
/// Compare packed double-precision (64-bit) floating-point elements
157157
/// in `a` and `b`, and return packed minimum values
158158
#[inline(always)]
159159
#[target_feature = "+avx"]
@@ -162,7 +162,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
162162
minpd256(a, b)
163163
}
164164

165-
/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
165+
/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
166166
/// and return packed minimum values
167167
#[inline(always)]
168168
#[target_feature = "+avx"]
@@ -711,21 +711,21 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
711711
#[inline(always)]
712712
#[target_feature = "+avx"]
713713
pub unsafe fn _mm256_undefined_ps() -> f32x8 {
714-
mem::uninitialized()
714+
f32x8::splat(mem::uninitialized())
715715
}
716716

717717
/// Return vector of type `f64x4` with undefined elements.
718718
#[inline(always)]
719719
#[target_feature = "+avx"]
720720
pub unsafe fn _mm256_undefined_pd() -> f64x4 {
721-
mem::uninitialized()
721+
f64x4::splat(mem::uninitialized())
722722
}
723723

724724
/// Return vector of type `i64x4` with undefined elements.
725725
#[inline(always)]
726726
#[target_feature = "+avx"]
727727
pub unsafe fn _mm256_undefined_si256() -> i64x4 {
728-
mem::uninitialized()
728+
i64x4::splat(mem::uninitialized())
729729
}
730730

731731
/// LLVM intrinsics used in the above functions

src/x86/sse.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
252252
/// half of result.
253253
#[inline(always)]
254254
#[target_feature = "+sse"]
255-
#[cfg_attr(test, assert_instr(unpcklpd))]
255+
#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
256+
#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
256257
pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
257258
simd_shuffle4(a, b, [0, 1, 4, 5])
258259
}
@@ -851,7 +852,7 @@ mod tests {
851852
let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
852853

853854
sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_ON);
854-
let r = sse::_mm_mul_ps(black_box(a), black_box(b));
855+
let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
855856

856857
sse::_mm_setcsr(saved_csr);
857858

@@ -869,7 +870,7 @@ mod tests {
869870
let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
870871

871872
sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_OFF);
872-
let r = sse::_mm_mul_ps(black_box(a), black_box(b));
873+
let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
873874

874875
sse::_mm_setcsr(saved_csr);
875876

@@ -886,7 +887,7 @@ mod tests {
886887

887888
assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0); // just to be sure
888889

889-
let r = sse::_mm_mul_ps(black_box(a), black_box(b));
890+
let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
890891

891892
let exp = f32x4::new(1.1e-41, 0.0, 0.0, 1.0);
892893
assert_eq!(r, exp);

src/x86/sse2.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -891,7 +891,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
891891
#[target_feature = "+sse2"]
892892
#[cfg_attr(test, assert_instr(movups))]
893893
pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
894-
let mut dst = mem::uninitialized();
894+
let mut dst = __m128i::splat(mem::uninitialized());
895895
ptr::copy_nonoverlapping(
896896
mem_addr as *const u8,
897897
&mut dst as *mut __m128i as *mut u8,

src/x86/sse42.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,7 @@ mod tests {
638638
// a bit difficult. Rather than `load` and mutate the __m128i,
639639
// it is easier to memcpy the given string to a local slice with
640640
// length 16 and `load` the local slice.
641+
#[target_feature = "+sse4.2"]
641642
unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
642643
assert!(s.len() <= 16);
643644
let slice = &mut [0u8; 16];

0 commit comments

Comments
 (0)