@@ -598,6 +598,146 @@ pub unsafe fn _mm_ucomineq_ss(a: f32x4, b: f32x4) -> i32 {
598
598
ucomineq_ss ( a, b)
599
599
}
600
600
601
+ /// Convert the lowest 32 bit float in the input vector to a 32 bit integer.
602
+ ///
603
+ /// The result is rounded according to the current rounding mode. If the result
604
+ /// cannot be represented as a 32 bit integer the result will be `0x8000_0000`
605
+ /// (`std::i32::MIN`) or an invalid operation floating point exception if
606
+ /// unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
607
+ ///
608
+ /// This corresponds to the `CVTSS2SI` instruction (with 32 bit output).
609
+ #[ inline( always) ]
610
+ #[ target_feature = "+sse" ]
611
+ #[ cfg_attr( test, assert_instr( cvtss2si) ) ]
612
+ pub unsafe fn _mm_cvtss_si32 ( a : f32x4 ) -> i32 {
613
+ cvtss2si ( a)
614
+ }
615
+
616
+ /// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html).
617
+ #[ inline( always) ]
618
+ #[ target_feature = "+sse" ]
619
+ #[ cfg_attr( test, assert_instr( cvtss2si) ) ]
620
+ pub unsafe fn _mm_cvt_ss2si ( a : f32x4 ) -> i32 {
621
+ _mm_cvtss_si32 ( a)
622
+ }
623
+
624
+ /// Convert the lowest 32 bit float in the input vector to a 64 bit integer.
625
+ ///
626
+ /// The result is rounded according to the current rounding mode. If the result
627
+ /// cannot be represented as a 64 bit integer the result will be
628
+ /// `0x8000_0000_0000_0000` (`std::i64::MIN`) or trigger an invalid operation
629
+ /// floating point exception if unmasked (see
630
+ /// [`_mm_setcsr`](fn._mm_setcsr.html)).
631
+ ///
632
+ /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
633
+ #[ inline( always) ]
634
+ #[ target_feature = "+sse" ]
635
+ #[ cfg_attr( test, assert_instr( cvtss2si) ) ]
636
+ #[ cfg( target_arch = "x86_64" ) ]
637
+ pub unsafe fn _mm_cvtss_si64 ( a : f32x4 ) -> i64 {
638
+ cvtss2si64 ( a)
639
+ }
640
+
641
+ // Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
642
+ // pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
643
+ // pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }
644
+
645
+ /// Convert the lowest 32 bit float in the input vector to a 32 bit integer with
646
+ /// truncation.
647
+ ///
648
+ /// The result is rounded always using truncation (round towards zero). If the
649
+ /// result cannot be represented as a 32 bit integer the result will be
650
+ /// `0x8000_0000` (`std::i32::MIN`) or an invalid operation floating point
651
+ /// exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
652
+ ///
653
+ /// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output).
654
+ #[ inline( always) ]
655
+ #[ target_feature = "+sse" ]
656
+ #[ cfg_attr( test, assert_instr( cvttss2si) ) ]
657
+ pub unsafe fn _mm_cvttss_si32 ( a : f32x4 ) -> i32 {
658
+ cvttss2si ( a)
659
+ }
660
+
661
+ /// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html).
662
+ #[ inline( always) ]
663
+ #[ target_feature = "+sse" ]
664
+ #[ cfg_attr( test, assert_instr( cvttss2si) ) ]
665
+ pub unsafe fn _mm_cvtt_ss2si ( a : f32x4 ) -> i32 {
666
+ _mm_cvttss_si32 ( a)
667
+ }
668
+
669
+ /// Convert the lowest 32 bit float in the input vector to a 64 bit integer with
670
+ /// truncation.
671
+ ///
672
+ /// The result is rounded always using truncation (round towards zero). If the
673
+ /// result cannot be represented as a 64 bit integer the result will be
674
+ /// `0x8000_0000_0000_0000` (`std::i64::MIN`) or an invalid operation floating
675
+ /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
676
+ ///
677
+ /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
678
+ #[ inline( always) ]
679
+ #[ target_feature = "+sse" ]
680
+ #[ cfg_attr( test, assert_instr( cvttss2si) ) ]
681
+ #[ cfg( target_arch = "x86_64" ) ]
682
+ pub unsafe fn _mm_cvttss_si64 ( a : f32x4 ) -> i64 {
683
+ cvttss2si64 ( a)
684
+ }
685
+
686
+ // Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
687
+ // pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2;
688
+ // pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) }
689
+
690
+ /// Extract the lowest 32 bit float from the input vector.
691
+ #[ inline( always) ]
692
+ #[ target_feature = "+sse" ]
693
+ // No point in using assert_instrs. In Unix x86_64 calling convention this is a
694
+ // no-op, and on Windows it's just a `mov`.
695
+ pub unsafe fn _mm_cvtss_f32 ( a : f32x4 ) -> f32 {
696
+ a. extract ( 0 )
697
+ }
698
+
699
+ /// Convert a 32 bit integer to a 32 bit float. The result vector is the input
700
+ /// vector `a` with the lowest 32 bit float replaced by the converted integer.
701
+ ///
702
+ /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit
703
+ /// input).
704
+ #[ inline( always) ]
705
+ #[ target_feature = "+sse" ]
706
+ #[ cfg_attr( all( test, target_os = "macos" ) , assert_instr( cvtsi2ssl) ) ]
707
+ #[ cfg_attr( all( test, not( target_os = "macos" ) ) , assert_instr( cvtsi2ss) ) ]
708
+ pub unsafe fn _mm_cvtsi32_ss ( a : f32x4 , b : i32 ) -> f32x4 {
709
+ a. replace ( 0 , b as f32 )
710
+ }
711
+
712
+ /// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html).
713
+ #[ inline( always) ]
714
+ #[ target_feature = "+sse" ]
715
+ #[ cfg_attr( all( test, target_os = "macos" ) , assert_instr( cvtsi2ssl) ) ]
716
+ #[ cfg_attr( all( test, not( target_os = "macos" ) ) , assert_instr( cvtsi2ss) ) ]
717
+ pub unsafe fn _mm_cvt_si2ss ( a : f32x4 , b : i32 ) -> f32x4 {
718
+ _mm_cvtsi32_ss ( a, b)
719
+ }
720
+
721
+ /// Convert a 64 bit integer to a 32 bit float. The result vector is the input
722
+ /// vector `a` with the lowest 32 bit float replaced by the converted integer.
723
+ ///
724
+ /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
725
+ /// input).
726
+ #[ inline( always) ]
727
+ #[ target_feature = "+sse" ]
728
+ #[ cfg_attr( all( test, target_os = "macos" ) , assert_instr( cvtsi2ssq) ) ]
729
+ #[ cfg_attr( all( test, not( target_os = "macos" ) ) , assert_instr( cvtsi2ss) ) ]
730
+ #[ cfg( target_arch = "x86_64" ) ]
731
+ pub unsafe fn _mm_cvtsi64_ss ( a : f32x4 , b : i64 ) -> f32x4 {
732
+ a. replace ( 0 , b as f32 )
733
+ }
734
+
735
+ // Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
736
+ // pub unsafe fn _mm_cvtpi32_ps(a: f32x4, b: i32x2) -> f32x4
737
+ // pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
738
+ // _mm_cvtpi32_ps(a, b)
739
+ // }
740
+
601
741
/// Construct a `f32x4` with the lowest element set to `a` and the rest set to
602
742
/// zero.
603
743
#[ inline( always) ]
@@ -1542,6 +1682,16 @@ extern {
1542
1682
fn ucomige_ss ( a : f32x4 , b : f32x4 ) -> i32 ;
1543
1683
#[ link_name = "llvm.x86.sse.ucomineq.ss" ]
1544
1684
fn ucomineq_ss ( a : f32x4 , b : f32x4 ) -> i32 ;
1685
+ #[ link_name = "llvm.x86.sse.cvtss2si" ]
1686
+ fn cvtss2si ( a : f32x4 ) -> i32 ;
1687
+ #[ link_name = "llvm.x86.sse.cvtss2si64" ]
1688
+ #[ cfg( target_arch = "x86_64" ) ]
1689
+ fn cvtss2si64 ( a : f32x4 ) -> i64 ;
1690
+ #[ link_name = "llvm.x86.sse.cvttss2si" ]
1691
+ fn cvttss2si ( a : f32x4 ) -> i32 ;
1692
+ #[ link_name = "llvm.x86.sse.cvttss2si64" ]
1693
+ #[ cfg( target_arch = "x86_64" ) ]
1694
+ fn cvttss2si64 ( a : f32x4 ) -> i64 ;
1545
1695
#[ link_name = "llvm.x86.sse.sfence" ]
1546
1696
fn sfence ( ) ;
1547
1697
#[ link_name = "llvm.x86.sse.stmxcsr" ]
@@ -2532,6 +2682,152 @@ mod tests {
2532
2682
}
2533
2683
}
2534
2684
2685
+ #[ simd_test = "sse" ]
2686
+ unsafe fn _mm_cvtss_si32 ( ) {
2687
+ use std:: f32:: NAN ;
2688
+ use std:: i32:: MIN ;
2689
+ let inputs = & [ 42.0f32 , -3.1 , 4.0e10 , 4.0e-20 , NAN , 2147483500.1 ] ;
2690
+ let result = & [ 42i32 , -3 , MIN , 0 , MIN , 2147483520 ] ;
2691
+ for i in 0 ..inputs. len ( ) {
2692
+ let x = f32x4:: new ( inputs[ i] , 1.0 , 3.0 , 4.0 ) ;
2693
+ let e = result[ i] ;
2694
+ let r = sse:: _mm_cvtss_si32 ( x) ;
2695
+ assert_eq ! ( e, r,
2696
+ "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}" ,
2697
+ i, x, r, e) ;
2698
+ }
2699
+ }
2700
+
2701
+ #[ simd_test = "sse" ]
2702
+ #[ cfg( target_arch = "x86_64" ) ]
2703
+ unsafe fn _mm_cvtss_si64 ( ) {
2704
+ use std:: f32:: NAN ;
2705
+ use std:: i64:: MIN ;
2706
+ let inputs = & [
2707
+ ( 42.0f32 , 42i64 ) ,
2708
+ ( -31.4 , -31 ) ,
2709
+ ( -33.5 , -34 ) ,
2710
+ ( -34.5 , -34 ) ,
2711
+ ( 4.0e10 , 40_000_000_000 ) ,
2712
+ ( 4.0e-10 , 0 ) ,
2713
+ ( NAN , MIN ) ,
2714
+ ( 2147483500.1 , 2147483520 ) ,
2715
+ ( 9.223371e18 , 9223370937343148032 )
2716
+ ] ;
2717
+ for i in 0 ..inputs. len ( ) {
2718
+ let ( xi, e) = inputs[ i] ;
2719
+ let x = f32x4:: new ( xi, 1.0 , 3.0 , 4.0 ) ;
2720
+ let r = sse:: _mm_cvtss_si64 ( x) ;
2721
+ assert_eq ! ( e, r,
2722
+ "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}" ,
2723
+ i, x, r, e) ;
2724
+ }
2725
+ }
2726
+
2727
+ #[ simd_test = "sse" ]
2728
+ unsafe fn _mm_cvttss_si32 ( ) {
2729
+ use std:: f32:: NAN ;
2730
+ use std:: i32:: MIN ;
2731
+ let inputs = & [
2732
+ ( 42.0f32 , 42i32 ) ,
2733
+ ( -31.4 , -31 ) ,
2734
+ ( -33.5 , -33 ) ,
2735
+ ( -34.5 , -34 ) ,
2736
+ ( 10.999 , 10 ) ,
2737
+ ( -5.99 , -5 ) ,
2738
+ ( 4.0e10 , MIN ) ,
2739
+ ( 4.0e-10 , 0 ) ,
2740
+ ( NAN , MIN ) ,
2741
+ ( 2147483500.1 , 2147483520 ) ,
2742
+ ] ;
2743
+ for i in 0 ..inputs. len ( ) {
2744
+ let ( xi, e) = inputs[ i] ;
2745
+ let x = f32x4:: new ( xi, 1.0 , 3.0 , 4.0 ) ;
2746
+ let r = sse:: _mm_cvttss_si32 ( x) ;
2747
+ assert_eq ! ( e, r,
2748
+ "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}" ,
2749
+ i, x, r, e) ;
2750
+ }
2751
+ }
2752
+
2753
+ #[ simd_test = "sse" ]
2754
+ #[ cfg( target_arch = "x86_64" ) ]
2755
+ unsafe fn _mm_cvttss_si64 ( ) {
2756
+ use std:: f32:: NAN ;
2757
+ use std:: i64:: MIN ;
2758
+ let inputs = & [
2759
+ ( 42.0f32 , 42i64 ) ,
2760
+ ( -31.4 , -31 ) ,
2761
+ ( -33.5 , -33 ) ,
2762
+ ( -34.5 , -34 ) ,
2763
+ ( 10.999 , 10 ) ,
2764
+ ( -5.99 , -5 ) ,
2765
+ ( 4.0e10 , 40_000_000_000 ) ,
2766
+ ( 4.0e-10 , 0 ) ,
2767
+ ( NAN , MIN ) ,
2768
+ ( 2147483500.1 , 2147483520 ) ,
2769
+ ( 9.223371e18 , 9223370937343148032 ) ,
2770
+ ( 9.223372e18 , MIN ) ,
2771
+ ] ;
2772
+ for i in 0 ..inputs. len ( ) {
2773
+ let ( xi, e) = inputs[ i] ;
2774
+ let x = f32x4:: new ( xi, 1.0 , 3.0 , 4.0 ) ;
2775
+ let r = sse:: _mm_cvttss_si64 ( x) ;
2776
+ assert_eq ! ( e, r,
2777
+ "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}" ,
2778
+ i, x, r, e) ;
2779
+ }
2780
+ }
2781
+
2782
+ #[ simd_test = "sse" ]
2783
+ pub unsafe fn _mm_cvtsi32_ss ( ) {
2784
+ let inputs = & [
2785
+ ( 4555i32 , 4555.0f32 ) ,
2786
+ ( 322223333 , 322223330.0 ) ,
2787
+ ( -432 , -432.0 ) ,
2788
+ ( -322223333 , -322223330.0 )
2789
+ ] ;
2790
+
2791
+ for i in 0 ..inputs. len ( ) {
2792
+ let ( x, f) = inputs[ i] ;
2793
+ let a = f32x4:: new ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
2794
+ let r = sse:: _mm_cvtsi32_ss ( a, x) ;
2795
+ let e = a. replace ( 0 , f) ;
2796
+ assert_eq ! ( e, r,
2797
+ "TestCase #{} _mm_cvtsi32_ss({:?}, {}) = {:?}, expected: {:?}" ,
2798
+ i, a, x, r, e) ;
2799
+ }
2800
+ }
2801
+
2802
+ #[ simd_test = "sse" ]
2803
+ #[ cfg( target_arch = "x86_64" ) ]
2804
+ pub unsafe fn _mm_cvtsi64_ss ( ) {
2805
+ let inputs = & [
2806
+ ( 4555i64 , 4555.0f32 ) ,
2807
+ ( 322223333 , 322223330.0 ) ,
2808
+ ( -432 , -432.0 ) ,
2809
+ ( -322223333 , -322223330.0 ) ,
2810
+ ( 9223372036854775807 , 9.223372e18 ) ,
2811
+ ( -9223372036854775808 , -9.223372e18 )
2812
+ ] ;
2813
+
2814
+ for i in 0 ..inputs. len ( ) {
2815
+ let ( x, f) = inputs[ i] ;
2816
+ let a = f32x4:: new ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
2817
+ let r = sse:: _mm_cvtsi64_ss ( a, x) ;
2818
+ let e = a. replace ( 0 , f) ;
2819
+ assert_eq ! ( e, r,
2820
+ "TestCase #{} _mm_cvtsi64_ss({:?}, {}) = {:?}, expected: {:?}" ,
2821
+ i, a, x, r, e) ;
2822
+ }
2823
+ }
2824
+
2825
+ #[ simd_test = "sse" ]
2826
+ pub unsafe fn _mm_cvtss_f32 ( ) {
2827
+ let a = f32x4:: new ( 312.0134 , 5.0 , 6.0 , 7.0 ) ;
2828
+ assert_eq ! ( sse:: _mm_cvtss_f32( a) , 312.0134 ) ;
2829
+ }
2830
+
2535
2831
#[ simd_test = "sse" ]
2536
2832
unsafe fn _mm_set_ss ( ) {
2537
2833
let r = sse:: _mm_set_ss ( black_box ( 4.25 ) ) ;
0 commit comments