Skip to content

Commit f7e1efe

Browse files
[LLVM][ISel][SVE] Add patterns for merging reverse subtracts. (#101488)
vselect cond, ([f]sub b, a), a ==> [f]subr cond, a, b
1 parent 3c87f66 commit f7e1efe

File tree

3 files changed

+39
-30
lines changed

3 files changed

+39
-30
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,10 @@ def AArch64fsub_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
279279
(int_aarch64_sve_fsub node:$pg, node:$op1, node:$op2),
280280
(vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2), node:$op1)
281281
]>;
282+
def AArch64fsubr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
283+
(int_aarch64_sve_fsubr node:$pg, node:$op1, node:$op2),
284+
(vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op2, node:$op1), node:$op1)
285+
]>;
282286

283287
def AArch64shadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
284288
[(int_aarch64_sve_shadd node:$pg, node:$op1, node:$op2),
@@ -423,6 +427,11 @@ def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
423427

424428
def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
425429
(sub node:$op2, node:$op1)>;
430+
431+
def AArch64subr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2),
432+
[(int_aarch64_sve_subr node:$pg, node:$op1, node:$op2),
433+
(vselect node:$pg, (sub node:$op2, node:$op1), node:$op1)]>;
434+
426435
def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
427436
[(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3),
428437
(vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
@@ -529,7 +538,7 @@ let Predicates = [HasSVEorSME] in {
529538

530539
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", AArch64add_m1, DestructiveBinaryComm>;
531540
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", AArch64sub_m1, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
532-
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
541+
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", AArch64subr_m1, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
533542

534543
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", AArch64orr_m1, DestructiveBinaryComm>;
535544
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", AArch64eor_m1, DestructiveBinaryComm>;
@@ -685,7 +694,7 @@ let Predicates = [HasSVEorSME] in {
685694
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>;
686695
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
687696
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
688-
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
697+
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", AArch64fsubr_m1, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
689698
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", AArch64fmaxnm_m1, DestructiveBinaryComm>;
690699
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", AArch64fminnm_m1, DestructiveBinaryComm>;
691700
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", AArch64fmax_m1, DestructiveBinaryComm>;

llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,9 +1505,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
15051505
; CHECK-LABEL: sub_nxv2i64_y:
15061506
; CHECK: // %bb.0: // %entry
15071507
; CHECK-NEXT: ptrue p0.d
1508-
; CHECK-NEXT: sub z0.d, z0.d, z1.d
15091508
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
1510-
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
1509+
; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
1510+
; CHECK-NEXT: mov z0.d, z1.d
15111511
; CHECK-NEXT: ret
15121512
entry:
15131513
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1520,9 +1520,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
15201520
; CHECK-LABEL: sub_nxv4i32_y:
15211521
; CHECK: // %bb.0: // %entry
15221522
; CHECK-NEXT: ptrue p0.s
1523-
; CHECK-NEXT: sub z0.s, z0.s, z1.s
15241523
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
1525-
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
1524+
; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
1525+
; CHECK-NEXT: mov z0.d, z1.d
15261526
; CHECK-NEXT: ret
15271527
entry:
15281528
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1535,9 +1535,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
15351535
; CHECK-LABEL: sub_nxv8i16_y:
15361536
; CHECK: // %bb.0: // %entry
15371537
; CHECK-NEXT: ptrue p0.h
1538-
; CHECK-NEXT: sub z0.h, z0.h, z1.h
15391538
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
1540-
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
1539+
; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
1540+
; CHECK-NEXT: mov z0.d, z1.d
15411541
; CHECK-NEXT: ret
15421542
entry:
15431543
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1550,9 +1550,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
15501550
; CHECK-LABEL: sub_nxv16i8_y:
15511551
; CHECK: // %bb.0: // %entry
15521552
; CHECK-NEXT: ptrue p0.b
1553-
; CHECK-NEXT: sub z0.b, z0.b, z1.b
15541553
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
1555-
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
1554+
; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
1555+
; CHECK-NEXT: mov z0.d, z1.d
15561556
; CHECK-NEXT: ret
15571557
entry:
15581558
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -2517,10 +2517,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
25172517
; CHECK-LABEL: fsub_nxv4f32_y:
25182518
; CHECK: // %bb.0: // %entry
25192519
; CHECK-NEXT: ptrue p0.s
2520-
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
25212520
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
25222521
; CHECK-NEXT: not p0.b, p0/z, p1.b
2523-
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
2522+
; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
2523+
; CHECK-NEXT: mov z0.d, z1.d
25242524
; CHECK-NEXT: ret
25252525
entry:
25262526
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2533,10 +2533,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
25332533
; CHECK-LABEL: fsub_nxv8f16_y:
25342534
; CHECK: // %bb.0: // %entry
25352535
; CHECK-NEXT: ptrue p0.h
2536-
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
25372536
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
25382537
; CHECK-NEXT: not p0.b, p0/z, p1.b
2539-
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
2538+
; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
2539+
; CHECK-NEXT: mov z0.d, z1.d
25402540
; CHECK-NEXT: ret
25412541
entry:
25422542
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2549,10 +2549,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
25492549
; CHECK-LABEL: fsub_nxv2f64_y:
25502550
; CHECK: // %bb.0: // %entry
25512551
; CHECK-NEXT: ptrue p0.d
2552-
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
25532552
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
25542553
; CHECK-NEXT: not p0.b, p0/z, p1.b
2555-
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
2554+
; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
2555+
; CHECK-NEXT: mov z0.d, z1.d
25562556
; CHECK-NEXT: ret
25572557
entry:
25582558
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer

llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -932,9 +932,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
932932
; CHECK-LABEL: sub_nxv2i64_y:
933933
; CHECK: // %bb.0: // %entry
934934
; CHECK-NEXT: ptrue p0.d
935-
; CHECK-NEXT: sub z0.d, z0.d, z1.d
936935
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
937-
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
936+
; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
937+
; CHECK-NEXT: mov z0.d, z1.d
938938
; CHECK-NEXT: ret
939939
entry:
940940
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -947,9 +947,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
947947
; CHECK-LABEL: sub_nxv4i32_y:
948948
; CHECK: // %bb.0: // %entry
949949
; CHECK-NEXT: ptrue p0.s
950-
; CHECK-NEXT: sub z0.s, z0.s, z1.s
951950
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
952-
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
951+
; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
952+
; CHECK-NEXT: mov z0.d, z1.d
953953
; CHECK-NEXT: ret
954954
entry:
955955
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -962,9 +962,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
962962
; CHECK-LABEL: sub_nxv8i16_y:
963963
; CHECK: // %bb.0: // %entry
964964
; CHECK-NEXT: ptrue p0.h
965-
; CHECK-NEXT: sub z0.h, z0.h, z1.h
966965
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
967-
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
966+
; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
967+
; CHECK-NEXT: mov z0.d, z1.d
968968
; CHECK-NEXT: ret
969969
entry:
970970
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -977,9 +977,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
977977
; CHECK-LABEL: sub_nxv16i8_y:
978978
; CHECK: // %bb.0: // %entry
979979
; CHECK-NEXT: ptrue p0.b
980-
; CHECK-NEXT: sub z0.b, z0.b, z1.b
981980
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
982-
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
981+
; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
982+
; CHECK-NEXT: mov z0.d, z1.d
983983
; CHECK-NEXT: ret
984984
entry:
985985
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1588,10 +1588,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
15881588
; CHECK-LABEL: fsub_nxv4f32_y:
15891589
; CHECK: // %bb.0: // %entry
15901590
; CHECK-NEXT: ptrue p0.s
1591-
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
15921591
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
15931592
; CHECK-NEXT: not p0.b, p0/z, p1.b
1594-
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
1593+
; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
1594+
; CHECK-NEXT: mov z0.d, z1.d
15951595
; CHECK-NEXT: ret
15961596
entry:
15971597
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1604,10 +1604,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
16041604
; CHECK-LABEL: fsub_nxv8f16_y:
16051605
; CHECK: // %bb.0: // %entry
16061606
; CHECK-NEXT: ptrue p0.h
1607-
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
16081607
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
16091608
; CHECK-NEXT: not p0.b, p0/z, p1.b
1610-
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
1609+
; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
1610+
; CHECK-NEXT: mov z0.d, z1.d
16111611
; CHECK-NEXT: ret
16121612
entry:
16131613
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1620,10 +1620,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
16201620
; CHECK-LABEL: fsub_nxv2f64_y:
16211621
; CHECK: // %bb.0: // %entry
16221622
; CHECK-NEXT: ptrue p0.d
1623-
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
16241623
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
16251624
; CHECK-NEXT: not p0.b, p0/z, p1.b
1626-
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
1625+
; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
1626+
; CHECK-NEXT: mov z0.d, z1.d
16271627
; CHECK-NEXT: ret
16281628
entry:
16291629
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer

0 commit comments

Comments
 (0)