Skip to content

[LLVM][ISel][SVE] Add patterns for merging reverse subtracts. #101488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ def AArch64fsub_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
(int_aarch64_sve_fsub node:$pg, node:$op1, node:$op2),
(vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2), node:$op1)
]>;
def AArch64fsubr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
(int_aarch64_sve_fsubr node:$pg, node:$op1, node:$op2),
(vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op2, node:$op1), node:$op1)
]>;

def AArch64shadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
[(int_aarch64_sve_shadd node:$pg, node:$op1, node:$op2),
Expand Down Expand Up @@ -423,6 +427,11 @@ def AArch64bic : PatFrags<(ops node:$op1, node:$op2),

def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
(sub node:$op2, node:$op1)>;

def AArch64subr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2),
[(int_aarch64_sve_subr node:$pg, node:$op1, node:$op2),
(vselect node:$pg, (sub node:$op2, node:$op1), node:$op1)]>;

def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3),
(vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
Expand Down Expand Up @@ -529,7 +538,7 @@ let Predicates = [HasSVEorSME] in {

defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", AArch64add_m1, DestructiveBinaryComm>;
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", AArch64sub_m1, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", AArch64subr_m1, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;

defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", AArch64orr_m1, DestructiveBinaryComm>;
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", AArch64eor_m1, DestructiveBinaryComm>;
Expand Down Expand Up @@ -685,7 +694,7 @@ let Predicates = [HasSVEorSME] in {
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>;
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", AArch64fsubr_m1, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", AArch64fmaxnm_m1, DestructiveBinaryComm>;
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", AArch64fminnm_m1, DestructiveBinaryComm>;
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", AArch64fmax_m1, DestructiveBinaryComm>;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1505,9 +1505,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
Expand All @@ -1520,9 +1520,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
Expand All @@ -1535,9 +1535,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
Expand All @@ -1550,9 +1550,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
Expand Down Expand Up @@ -2517,10 +2517,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
; CHECK-LABEL: fsub_nxv4f32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
Expand All @@ -2533,10 +2533,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
; CHECK-LABEL: fsub_nxv8f16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
Expand All @@ -2549,10 +2549,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
; CHECK-LABEL: fsub_nxv2f64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -932,9 +932,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
Expand All @@ -947,9 +947,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
Expand All @@ -962,9 +962,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
Expand All @@ -977,9 +977,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
Expand Down Expand Up @@ -1588,10 +1588,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
; CHECK-LABEL: fsub_nxv4f32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
Expand All @@ -1604,10 +1604,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
; CHECK-LABEL: fsub_nxv8f16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
Expand All @@ -1620,10 +1620,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
; CHECK-LABEL: fsub_nxv2f64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
Expand Down
Loading