@@ -2638,55 +2638,109 @@ FORCE_INLINE uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b) {
2638
2638
return __riscv_vmerge_vvm_u32m1(vdupq_n_u32(0x0), vdupq_n_u32(UINT32_MAX), cmp_res, 4);
2639
2639
}
2640
2640
2641
- // FORCE_INLINE uint64x1_t vcgt_s64(int64x1_t a, int64x1_t b);
2641
+ FORCE_INLINE uint64x1_t vcgt_s64(int64x1_t a, int64x1_t b) {
2642
+ vbool64_t cmp_res = __riscv_vmsgt_vv_i64m1_b64(a, b, 1);
2643
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 1);
2644
+ }
2642
2645
2643
- // FORCE_INLINE uint64x2_t vcgtq_s64(int64x2_t a, int64x2_t b);
2646
+ FORCE_INLINE uint64x2_t vcgtq_s64(int64x2_t a, int64x2_t b) {
2647
+ vbool64_t cmp_res = __riscv_vmsgt_vv_i64m1_b64(a, b, 2);
2648
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 2);
2649
+ }
2644
2650
2645
- // FORCE_INLINE uint64x1_t vcgt_u64(uint64x1_t a, uint64x1_t b);
2651
+ FORCE_INLINE uint64x1_t vcgt_u64(uint64x1_t a, uint64x1_t b) {
2652
+ vbool64_t cmp_res = __riscv_vmsgeu_vv_u64m1_b64(a, b, 1);
2653
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 1);
2654
+ }
2646
2655
2647
- // FORCE_INLINE uint64x2_t vcgtq_u64(uint64x2_t a, uint64x2_t b);
2656
+ FORCE_INLINE uint64x2_t vcgtq_u64(uint64x2_t a, uint64x2_t b) {
2657
+ vbool64_t cmp_res = __riscv_vmsgeu_vv_u64m1_b64(a, b, 2);
2658
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 2);
2659
+ }
2648
2660
2649
- // FORCE_INLINE uint64x1_t vcgt_f64(float64x1_t a, float64x1_t b);
2661
+ FORCE_INLINE uint64x1_t vcgt_f64(float64x1_t a, float64x1_t b) {
2662
+ vbool64_t cmp_res = __riscv_vmfgt_vv_f64m1_b64(a, b, 1);
2663
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 1);
2664
+ }
2650
2665
2651
- // FORCE_INLINE uint64x2_t vcgtq_f64(float64x2_t a, float64x2_t b);
2666
+ FORCE_INLINE uint64x2_t vcgtq_f64(float64x2_t a, float64x2_t b) {
2667
+ vbool64_t cmp_res = __riscv_vmfgt_vv_f64m1_b64(a, b, 2);
2668
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 2);
2669
+ }
2652
2670
2653
- // FORCE_INLINE uint64_t vcgtd_s64(int64_t a, int64_t b);
2671
+ FORCE_INLINE uint64_t vcgtd_s64(int64_t a, int64_t b) { return a > b ? UINT64_MAX : 0x0; }
2654
2672
2655
- // FORCE_INLINE uint64_t vcgtd_u64(uint64_t a, uint64_t b);
2673
+ FORCE_INLINE uint64_t vcgtd_u64(uint64_t a, uint64_t b) { return a > b ? UINT64_MAX : 0x0; }
2656
2674
2657
- // FORCE_INLINE uint32_t vcgts_f32(float32_t a, float32_t b);
2675
+ FORCE_INLINE uint32_t vcgts_f32(float32_t a, float32_t b) { return a > b ? UINT32_MAX : 0x0; }
2658
2676
2659
- // FORCE_INLINE uint64_t vcgtd_f64(float64_t a, float64_t b);
2677
+ FORCE_INLINE uint64_t vcgtd_f64(float64_t a, float64_t b) { return a > b ? UINT64_MAX : 0x0; }
2660
2678
2661
- // FORCE_INLINE uint8x8_t vcgtz_s8(int8x8_t a);
2679
+ FORCE_INLINE uint8x8_t vcgtz_s8(int8x8_t a) {
2680
+ vbool8_t cmp_res = __riscv_vmsgt_vx_i8m1_b8(a, 0, 8);
2681
+ return __riscv_vmerge_vvm_u8m1(vdup_n_u8(0x0), vdup_n_u8(UINT8_MAX), cmp_res, 8);
2682
+ }
2662
2683
2663
- // FORCE_INLINE uint8x16_t vcgtzq_s8(int8x16_t a);
2684
+ FORCE_INLINE uint8x16_t vcgtzq_s8(int8x16_t a) {
2685
+ vbool8_t cmp_res = __riscv_vmsgt_vx_i8m1_b8(a, 0, 16);
2686
+ return __riscv_vmerge_vvm_u8m1(vdupq_n_u8(0x0), vdupq_n_u8(UINT8_MAX), cmp_res, 16);
2687
+ }
2664
2688
2665
- // FORCE_INLINE uint16x4_t vcgtz_s16(int16x4_t a);
2689
+ FORCE_INLINE uint16x4_t vcgtz_s16(int16x4_t a) {
2690
+ vbool16_t cmp_res = __riscv_vmsgt_vx_i16m1_b16(a, 0, 4);
2691
+ return __riscv_vmerge_vvm_u16m1(vdup_n_u16(0x0), vdup_n_u16(UINT16_MAX), cmp_res, 4);
2692
+ }
2666
2693
2667
- // FORCE_INLINE uint16x8_t vcgtzq_s16(int16x8_t a);
2694
+ FORCE_INLINE uint16x8_t vcgtzq_s16(int16x8_t a) {
2695
+ vbool16_t cmp_res = __riscv_vmsgt_vx_i16m1_b16(a, 0, 8);
2696
+ return __riscv_vmerge_vvm_u16m1(vdupq_n_u16(0x0), vdupq_n_u16(UINT16_MAX), cmp_res, 8);
2697
+ }
2668
2698
2669
- // FORCE_INLINE uint32x2_t vcgtz_s32(int32x2_t a);
2699
+ FORCE_INLINE uint32x2_t vcgtz_s32(int32x2_t a) {
2700
+ vbool32_t cmp_res = __riscv_vmsgt_vx_i32m1_b32(a, 0, 2);
2701
+ return __riscv_vmerge_vvm_u32m1(vdup_n_u32(0x0), vdup_n_u32(UINT32_MAX), cmp_res, 2);
2702
+ }
2670
2703
2671
- // FORCE_INLINE uint32x4_t vcgtzq_s32(int32x4_t a);
2704
+ FORCE_INLINE uint32x4_t vcgtzq_s32(int32x4_t a) {
2705
+ vbool32_t cmp_res = __riscv_vmsgt_vx_i32m1_b32(a, 0, 4);
2706
+ return __riscv_vmerge_vvm_u32m1(vdupq_n_u32(0x0), vdupq_n_u32(UINT32_MAX), cmp_res, 4);
2707
+ }
2672
2708
2673
- // FORCE_INLINE uint64x1_t vcgtz_s64(int64x1_t a);
2709
+ FORCE_INLINE uint64x1_t vcgtz_s64(int64x1_t a) {
2710
+ vbool64_t cmp_res = __riscv_vmsgt_vx_i64m1_b64(a, 0, 1);
2711
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 1);
2712
+ }
2674
2713
2675
- // FORCE_INLINE uint64x2_t vcgtzq_s64(int64x2_t a);
2714
+ FORCE_INLINE uint64x2_t vcgtzq_s64(int64x2_t a) {
2715
+ vbool64_t cmp_res = __riscv_vmsgt_vx_i64m1_b64(a, 0, 2);
2716
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 2);
2717
+ }
2676
2718
2677
- // FORCE_INLINE uint32x2_t vcgtz_f32(float32x2_t a);
2719
+ FORCE_INLINE uint32x2_t vcgtz_f32(float32x2_t a) {
2720
+ vbool32_t cmp_res = __riscv_vmfgt_vf_f32m1_b32(a, 0, 2);
2721
+ return __riscv_vmerge_vvm_u32m1(vdup_n_u32(0x0), vdup_n_u32(UINT32_MAX), cmp_res, 2);
2722
+ }
2678
2723
2679
- // FORCE_INLINE uint32x4_t vcgtzq_f32(float32x4_t a);
2724
+ FORCE_INLINE uint32x4_t vcgtzq_f32(float32x4_t a) {
2725
+ vbool32_t cmp_res = __riscv_vmfgt_vf_f32m1_b32(a, 0, 4);
2726
+ return __riscv_vmerge_vvm_u32m1(vdupq_n_u32(0x0), vdupq_n_u32(UINT32_MAX), cmp_res, 4);
2727
+ }
2680
2728
2681
- // FORCE_INLINE uint64x1_t vcgtz_f64(float64x1_t a);
2729
+ FORCE_INLINE uint64x1_t vcgtz_f64(float64x1_t a) {
2730
+ vbool64_t cmp_res = __riscv_vmfgt_vf_f64m1_b64(a, 0, 1);
2731
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 1);
2732
+ }
2682
2733
2683
- // FORCE_INLINE uint64x2_t vcgtzq_f64(float64x2_t a);
2734
+ FORCE_INLINE uint64x2_t vcgtzq_f64(float64x2_t a) {
2735
+ vbool64_t cmp_res = __riscv_vmfgt_vf_f64m1_b64(a, 0, 2);
2736
+ return __riscv_vmerge_vvm_u64m1(vdupq_n_u64(0x0), vdupq_n_u64(UINT64_MAX), cmp_res, 2);
2737
+ }
2684
2738
2685
- // FORCE_INLINE uint64_t vcgtzd_s64(int64_t a);
2739
+ FORCE_INLINE uint64_t vcgtzd_s64(int64_t a) { return (a > 0) ? UINT64_MAX : 0x00; }
2686
2740
2687
- // FORCE_INLINE uint32_t vcgtzs_f32(float32_t a);
2741
+ FORCE_INLINE uint32_t vcgtzs_f32(float32_t a) { return (a > 0) ? UINT32_MAX : 0x00; }
2688
2742
2689
- // FORCE_INLINE uint64_t vcgtzd_f64(float64_t a);
2743
+ FORCE_INLINE uint64_t vcgtzd_f64(float64_t a) { return (a > 0) ? UINT64_MAX : 0x00; }
2690
2744
2691
2745
FORCE_INLINE uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b) {
2692
2746
vbool8_t cmp_res = __riscv_vmsgtu_vv_u8m1_b8(a, b, 16);
0 commit comments