@@ -2302,33 +2302,35 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
2302
2302
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
2303
2303
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2304
2304
; RV32-NEXT: csrr a3, vlenb
2305
- ; RV32-NEXT: slli a3, a3, 4
2305
+ ; RV32-NEXT: slli a3, a3, 3
2306
2306
; RV32-NEXT: add a3, sp, a3
2307
2307
; RV32-NEXT: addi a3, a3, 16
2308
2308
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2309
2309
; RV32-NEXT: vlse64.v v16, (a5), zero
2310
2310
; RV32-NEXT: csrr a3, vlenb
2311
- ; RV32-NEXT: slli a3, a3, 3
2311
+ ; RV32-NEXT: slli a3, a3, 4
2312
2312
; RV32-NEXT: add a3, sp, a3
2313
2313
; RV32-NEXT: addi a3, a3, 16
2314
2314
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2315
2315
; RV32-NEXT: lui a3, 4080
2316
- ; RV32-NEXT: vand.vx v24, v8, a3, v0.t
2317
- ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
2318
- ; RV32-NEXT: addi a5, sp, 16
2319
- ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2320
- ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
2321
- ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
2322
- ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2323
- ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2316
+ ; RV32-NEXT: vand.vx v16, v8, a3, v0.t
2317
+ ; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
2324
2318
; RV32-NEXT: csrr a5, vlenb
2325
2319
; RV32-NEXT: slli a5, a5, 4
2326
2320
; RV32-NEXT: add a5, sp, a5
2327
2321
; RV32-NEXT: addi a5, a5, 16
2322
+ ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
2323
+ ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2324
+ ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
2325
+ ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2326
+ ; RV32-NEXT: csrr a5, vlenb
2327
+ ; RV32-NEXT: slli a5, a5, 3
2328
+ ; RV32-NEXT: add a5, sp, a5
2329
+ ; RV32-NEXT: addi a5, a5, 16
2328
2330
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2329
2331
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2330
2332
; RV32-NEXT: csrr a5, vlenb
2331
- ; RV32-NEXT: slli a5, a5, 4
2333
+ ; RV32-NEXT: slli a5, a5, 3
2332
2334
; RV32-NEXT: add a5, sp, a5
2333
2335
; RV32-NEXT: addi a5, a5, 16
2334
2336
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
@@ -2342,7 +2344,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
2342
2344
; RV32-NEXT: vand.vx v24, v24, a3, v0.t
2343
2345
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2344
2346
; RV32-NEXT: csrr a1, vlenb
2345
- ; RV32-NEXT: slli a1, a1, 3
2347
+ ; RV32-NEXT: slli a1, a1, 4
2346
2348
; RV32-NEXT: add a1, sp, a1
2347
2349
; RV32-NEXT: addi a1, a1, 16
2348
2350
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2360,7 +2362,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
2360
2362
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2361
2363
; RV32-NEXT: vmv.v.x v24, a1
2362
2364
; RV32-NEXT: csrr a1, vlenb
2363
- ; RV32-NEXT: slli a1, a1, 4
2365
+ ; RV32-NEXT: slli a1, a1, 3
2364
2366
; RV32-NEXT: add a1, sp, a1
2365
2367
; RV32-NEXT: addi a1, a1, 16
2366
2368
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2687,33 +2689,35 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
2687
2689
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
2688
2690
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2689
2691
; RV32-NEXT: csrr a3, vlenb
2690
- ; RV32-NEXT: slli a3, a3, 4
2692
+ ; RV32-NEXT: slli a3, a3, 3
2691
2693
; RV32-NEXT: add a3, sp, a3
2692
2694
; RV32-NEXT: addi a3, a3, 16
2693
2695
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2694
2696
; RV32-NEXT: vlse64.v v16, (a5), zero
2695
2697
; RV32-NEXT: csrr a3, vlenb
2696
- ; RV32-NEXT: slli a3, a3, 3
2698
+ ; RV32-NEXT: slli a3, a3, 4
2697
2699
; RV32-NEXT: add a3, sp, a3
2698
2700
; RV32-NEXT: addi a3, a3, 16
2699
2701
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2700
2702
; RV32-NEXT: lui a3, 4080
2701
- ; RV32-NEXT: vand.vx v24, v8, a3, v0.t
2702
- ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
2703
- ; RV32-NEXT: addi a5, sp, 16
2704
- ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2705
- ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
2706
- ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
2707
- ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2708
- ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2703
+ ; RV32-NEXT: vand.vx v16, v8, a3, v0.t
2704
+ ; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
2709
2705
; RV32-NEXT: csrr a5, vlenb
2710
2706
; RV32-NEXT: slli a5, a5, 4
2711
2707
; RV32-NEXT: add a5, sp, a5
2712
2708
; RV32-NEXT: addi a5, a5, 16
2709
+ ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
2710
+ ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2711
+ ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
2712
+ ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2713
+ ; RV32-NEXT: csrr a5, vlenb
2714
+ ; RV32-NEXT: slli a5, a5, 3
2715
+ ; RV32-NEXT: add a5, sp, a5
2716
+ ; RV32-NEXT: addi a5, a5, 16
2713
2717
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2714
2718
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2715
2719
; RV32-NEXT: csrr a5, vlenb
2716
- ; RV32-NEXT: slli a5, a5, 4
2720
+ ; RV32-NEXT: slli a5, a5, 3
2717
2721
; RV32-NEXT: add a5, sp, a5
2718
2722
; RV32-NEXT: addi a5, a5, 16
2719
2723
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
@@ -2727,7 +2731,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
2727
2731
; RV32-NEXT: vand.vx v24, v24, a3, v0.t
2728
2732
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2729
2733
; RV32-NEXT: csrr a1, vlenb
2730
- ; RV32-NEXT: slli a1, a1, 3
2734
+ ; RV32-NEXT: slli a1, a1, 4
2731
2735
; RV32-NEXT: add a1, sp, a1
2732
2736
; RV32-NEXT: addi a1, a1, 16
2733
2737
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2745,7 +2749,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
2745
2749
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2746
2750
; RV32-NEXT: vmv.v.x v24, a1
2747
2751
; RV32-NEXT: csrr a1, vlenb
2748
- ; RV32-NEXT: slli a1, a1, 4
2752
+ ; RV32-NEXT: slli a1, a1, 3
2749
2753
; RV32-NEXT: add a1, sp, a1
2750
2754
; RV32-NEXT: addi a1, a1, 16
2751
2755
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -3052,19 +3056,8 @@ declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <v
3052
3056
define <vscale x 64 x i16 > @vp_bitreverse_nxv64i16 (<vscale x 64 x i16 > %va , <vscale x 64 x i1 > %m , i32 zeroext %evl ) {
3053
3057
; CHECK-LABEL: vp_bitreverse_nxv64i16:
3054
3058
; CHECK: # %bb.0:
3055
- ; CHECK-NEXT: addi sp, sp, -16
3056
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
3057
- ; CHECK-NEXT: csrr a1, vlenb
3058
- ; CHECK-NEXT: slli a1, a1, 4
3059
- ; CHECK-NEXT: sub sp, sp, a1
3060
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3061
3059
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
3062
- ; CHECK-NEXT: vmv1r.v v24, v0
3063
- ; CHECK-NEXT: csrr a1, vlenb
3064
- ; CHECK-NEXT: slli a1, a1, 3
3065
- ; CHECK-NEXT: add a1, sp, a1
3066
- ; CHECK-NEXT: addi a1, a1, 16
3067
- ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
3060
+ ; CHECK-NEXT: vmv1r.v v7, v0
3068
3061
; CHECK-NEXT: csrr a3, vlenb
3069
3062
; CHECK-NEXT: lui a1, 1
3070
3063
; CHECK-NEXT: lui a2, 3
@@ -3080,63 +3073,48 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
3080
3073
; CHECK-NEXT: addi a2, a2, 819
3081
3074
; CHECK-NEXT: addi a1, a6, 1365
3082
3075
; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma
3083
- ; CHECK-NEXT: vsrl.vi v8 , v16, 8, v0.t
3076
+ ; CHECK-NEXT: vsrl.vi v24 , v16, 8, v0.t
3084
3077
; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
3085
- ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3086
- ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
3078
+ ; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
3079
+ ; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t
3080
+ ; CHECK-NEXT: vand.vx v24, v24, a4, v0.t
3087
3081
; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3088
- ; CHECK-NEXT: vand.vx v8, v8, a4 , v0.t
3089
- ; CHECK-NEXT: vsll.vi v8, v8, 4 , v0.t
3090
- ; CHECK-NEXT: vor.vv v8 , v16, v8 , v0.t
3091
- ; CHECK-NEXT: vsrl.vi v16, v8, 2 , v0.t
3082
+ ; CHECK-NEXT: vsll.vi v16, v16, 4 , v0.t
3083
+ ; CHECK-NEXT: vor.vv v16, v24, v16 , v0.t
3084
+ ; CHECK-NEXT: vsrl.vi v24 , v16, 2 , v0.t
3085
+ ; CHECK-NEXT: vand.vx v24, v24, a2 , v0.t
3092
3086
; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
3093
- ; CHECK-NEXT: vand.vx v8, v8, a2 , v0.t
3094
- ; CHECK-NEXT: vsll.vi v8, v8, 2 , v0.t
3095
- ; CHECK-NEXT: vor.vv v8 , v16, v8 , v0.t
3096
- ; CHECK-NEXT: vsrl.vi v16, v8, 1 , v0.t
3087
+ ; CHECK-NEXT: vsll.vi v16, v16, 2 , v0.t
3088
+ ; CHECK-NEXT: vor.vv v16, v24, v16 , v0.t
3089
+ ; CHECK-NEXT: vsrl.vi v24 , v16, 1 , v0.t
3090
+ ; CHECK-NEXT: vand.vx v24, v24, a1 , v0.t
3097
3091
; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3098
- ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3099
- ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3100
- ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3101
- ; CHECK-NEXT: addi a5, sp, 16
3102
- ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
3092
+ ; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t
3093
+ ; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
3103
3094
; CHECK-NEXT: bltu a0, a3, .LBB46_2
3104
3095
; CHECK-NEXT: # %bb.1:
3105
3096
; CHECK-NEXT: mv a0, a3
3106
3097
; CHECK-NEXT: .LBB46_2:
3107
- ; CHECK-NEXT: vmv1r.v v0, v24
3108
- ; CHECK-NEXT: csrr a3, vlenb
3109
- ; CHECK-NEXT: slli a3, a3, 3
3110
- ; CHECK-NEXT: add a3, sp, a3
3111
- ; CHECK-NEXT: addi a3, a3, 16
3112
- ; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
3098
+ ; CHECK-NEXT: vmv1r.v v0, v7
3113
3099
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3114
- ; CHECK-NEXT: vsrl.vi v16 , v8, 8, v0.t
3100
+ ; CHECK-NEXT: vsrl.vi v24 , v8, 8, v0.t
3115
3101
; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
3116
- ; CHECK-NEXT: vor.vv v8, v8, v16 , v0.t
3117
- ; CHECK-NEXT: vsrl.vi v16 , v8, 4, v0.t
3118
- ; CHECK-NEXT: vand.vx v16, v16 , a4, v0.t
3102
+ ; CHECK-NEXT: vor.vv v8, v8, v24 , v0.t
3103
+ ; CHECK-NEXT: vsrl.vi v24 , v8, 4, v0.t
3104
+ ; CHECK-NEXT: vand.vx v24, v24 , a4, v0.t
3119
3105
; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
3120
3106
; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3121
- ; CHECK-NEXT: vor.vv v8, v16 , v8, v0.t
3122
- ; CHECK-NEXT: vsrl.vi v16 , v8, 2, v0.t
3123
- ; CHECK-NEXT: vand.vx v16, v16 , a2, v0.t
3107
+ ; CHECK-NEXT: vor.vv v8, v24 , v8, v0.t
3108
+ ; CHECK-NEXT: vsrl.vi v24 , v8, 2, v0.t
3109
+ ; CHECK-NEXT: vand.vx v24, v24 , a2, v0.t
3124
3110
; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
3125
3111
; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3126
- ; CHECK-NEXT: vor.vv v8, v16 , v8, v0.t
3127
- ; CHECK-NEXT: vsrl.vi v16 , v8, 1, v0.t
3128
- ; CHECK-NEXT: vand.vx v16, v16 , a1, v0.t
3112
+ ; CHECK-NEXT: vor.vv v8, v24 , v8, v0.t
3113
+ ; CHECK-NEXT: vsrl.vi v24 , v8, 1, v0.t
3114
+ ; CHECK-NEXT: vand.vx v24, v24 , a1, v0.t
3129
3115
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3130
3116
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3131
- ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3132
- ; CHECK-NEXT: addi a0, sp, 16
3133
- ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
3134
- ; CHECK-NEXT: csrr a0, vlenb
3135
- ; CHECK-NEXT: slli a0, a0, 4
3136
- ; CHECK-NEXT: add sp, sp, a0
3137
- ; CHECK-NEXT: .cfi_def_cfa sp, 16
3138
- ; CHECK-NEXT: addi sp, sp, 16
3139
- ; CHECK-NEXT: .cfi_def_cfa_offset 0
3117
+ ; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
3140
3118
; CHECK-NEXT: ret
3141
3119
;
3142
3120
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
0 commit comments