[AArch64][GlobalISel] Select arith extended add/sub in manual selection code

Jessica Paquette · Jessica Paquette · commit c42053f79b6c · 2020-11-11T09:26:03.000-08:00
The manual selection code for add/sub was not checking if it was possible to fold in shifts + extends (the *rx opcode variants). As a result, we could never select things like ``` cmp x1, w0, uxtw #2 ``` Because we don't import any patterns for compares. This adds support for the arithmetic shifted register forms and updates tests for instructions selected using `emitADD`, `emitADDS`, and `emitSUBS`. This is a 0.1% geomean code size improvement on SPECINT2000 at -Os. Differential Revision: https://reviews.llvm.org/D91207
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -188,7 +188,8 @@ class AArch64InstructionSelector : public InstructionSelector {
   ///    {{AArch64::ADDXri, AArch64::ADDWri},
   ///     {AArch64::ADDXrs, AArch64::ADDWrs},
   ///     {AArch64::ADDXrr, AArch64::ADDWrr},
-  ///     {AArch64::SUBXri, AArch64::SUBWri}}};
+  ///     {AArch64::SUBXri, AArch64::SUBWri},
+  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};
   /// \endcode
   ///
   /// Each row in the table corresponds to a different addressing mode. Each
@@ -199,6 +200,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   ///   - Row 1: The rs opcode variants
   ///   - Row 2: The rr opcode variants
   ///   - Row 3: The ri opcode variants for negative immediates
+  ///   - Row 4: The rx opcode variants
   ///
   /// \attention Columns must be structured as follows:
   ///   - Column 0: The 64-bit opcode variants
@@ -208,7 +210,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   /// \p LHS is the left-hand operand of the binop to emit.
   /// \p RHS is the right-hand operand of the binop to emit.
   MachineInstr *emitAddSub(
-      const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
+      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
       Register Dst, MachineOperand &LHS, MachineOperand &RHS,
       MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
@@ -3821,7 +3823,7 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
 }
 
 MachineInstr *AArch64InstructionSelector::emitAddSub(
-    const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
+    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
     Register Dst, MachineOperand &LHS, MachineOperand &RHS,
     MachineIRBuilder &MIRBuilder) const {
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
@@ -3842,6 +3844,11 @@ MachineInstr *AArch64InstructionSelector::emitAddSub(
     return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
                      MIRBuilder, Fns);
 
+  // INSTRrx form.
+  if (auto Fns = selectArithExtendedRegister(RHS))
+    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
+                     MIRBuilder, Fns);
+
   // INSTRrs form.
   if (auto Fns = selectShiftedRegister(RHS))
     return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
@@ -3854,35 +3861,38 @@ MachineInstr *
 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
                                     MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 4> OpcTable{
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{
       {{AArch64::ADDXri, AArch64::ADDWri},
        {AArch64::ADDXrs, AArch64::ADDWrs},
        {AArch64::ADDXrr, AArch64::ADDWrr},
-       {AArch64::SUBXri, AArch64::SUBWri}}};
+       {AArch64::SUBXri, AArch64::SUBWri},
+       {AArch64::ADDXrx, AArch64::ADDWrx}}};
   return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
 }
 
 MachineInstr *
 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
                                      MachineOperand &RHS,
                                      MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 4> OpcTable{
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{
       {{AArch64::ADDSXri, AArch64::ADDSWri},
        {AArch64::ADDSXrs, AArch64::ADDSWrs},
        {AArch64::ADDSXrr, AArch64::ADDSWrr},
-       {AArch64::SUBSXri, AArch64::SUBSWri}}};
+       {AArch64::SUBSXri, AArch64::SUBSWri},
+       {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
 }
 
 MachineInstr *
 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
                                      MachineOperand &RHS,
                                      MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 4> OpcTable{
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{
       {{AArch64::SUBSXri, AArch64::SUBSWri},
        {AArch64::SUBSXrs, AArch64::SUBSWrs},
        {AArch64::SUBSXrr, AArch64::SUBSWrr},
-       {AArch64::ADDSXri, AArch64::ADDSWri}}};
+       {AArch64::ADDSXri, AArch64::ADDSWri},
+       {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
 }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
@@ -603,3 +603,36 @@ body:             |
     %cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
     $w0 = COPY %cmp(s32)
     RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmn_arith_extended_shl
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $x0, $x1
+    ; We should be able to fold away the extend + shift and select ADDSXrx.
+
+    ; CHECK-LABEL: name: cmn_arith_extended_shl
+    ; CHECK: liveins: $w0, $x0, $x1
+    ; CHECK: %reg0:gpr64sp = COPY $x0
+    ; CHECK: %reg1:gpr32 = COPY $w0
+    ; CHECK: $xzr = ADDSXrx %reg0, %reg1, 50, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s64) = COPY $x0
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %sub:gpr(s64) = G_SUB %zero, %reg0
+
+    %reg1:gpr(s32) = COPY $w0
+    %ext:gpr(s64) = G_SEXT %reg1(s32)
+    %cst:gpr(s64) = G_CONSTANT i64 2
+    %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %sub(s64), %shift
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
@@ -182,3 +182,91 @@ body:             |
     %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
     $w0 = COPY %cmp(s32)
     RET_ReallyLR implicit $w0
+...
+---
+name:            cmp_arith_extended_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $x1
+
+    ; CHECK-LABEL: name: cmp_arith_extended_s64
+    ; CHECK: liveins: $w0, $x1
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %reg1:gpr64sp = COPY $x1
+    ; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %reg1:gpr(s64) = COPY $x1
+    %ext:gpr(s64) = G_ZEXT %reg0(s32)
+    %cst:gpr(s64) = G_CONSTANT i64 2
+    %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+    %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmp_arith_extended_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $h0
+
+    ; CHECK-LABEL: name: cmp_arith_extended_s32
+    ; CHECK: liveins: $w0, $w1, $h0
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
+    ; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
+    ; CHECK: %reg1:gpr32sp = COPY $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
+    ; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s16) = COPY $h0
+    %reg1:gpr(s32) = COPY $w1
+    %ext:gpr(s32) = G_ZEXT %reg0(s16)
+    %cst:gpr(s32) = G_CONSTANT i32 2
+    %shift:gpr(s32) = G_SHL %ext, %cst(s32)
+    %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s32), %shift
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmp_arith_extended_shl_too_large
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $x1
+
+    ; The constant on the G_SHL is > 4, so we won't sleect SUBSXrx
+
+    ; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
+    ; CHECK: liveins: $w0, $x1
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %reg1:gpr64 = COPY $x1
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32
+    ; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
+    ; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %reg1:gpr(s64) = COPY $x1
+    %ext:gpr(s64) = G_ZEXT %reg0(s32)
+    %cst:gpr(s64) = G_CONSTANT i64 5
+    %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+    %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
@@ -89,3 +89,24 @@ body:             |
     %2:gpr(p0) = G_PTR_ADD %0, %1(s64)
     $x0 = COPY %2(p0)
 ...
+---
+name:            ptr_add_arith_extended
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+      liveins: $x0
+    ; CHECK-LABEL: name: ptr_add_arith_extended
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %ptr:gpr64 = COPY $x1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %ptr
+    ; CHECK: %ptr_add:gpr64sp = ADDXrx [[COPY]], %reg0, 18
+    ; CHECK: $x0 = COPY %ptr_add
+    %reg0:gpr(s32) = COPY $w0
+    %ptr:gpr(p0) = COPY $x1
+    %ext:gpr(s64) = G_ZEXT %reg0(s32)
+    %cst:gpr(s64) = G_CONSTANT i64 2
+    %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+    %ptr_add:gpr(p0) = G_PTR_ADD %ptr, %shift(s64)
+    $x0 = COPY %ptr_add(p0)
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
@@ -136,3 +136,31 @@ body:             |
     %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
     $w0 = COPY %add(s32)
     RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddo_arith_extended
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $x0
+    ; Check that we get ADDSXrx.
+    ; CHECK-LABEL: name: uaddo_arith_extended
+    ; CHECK: liveins: $w0, $x0
+    ; CHECK: %reg0:gpr64sp = COPY $x0
+    ; CHECK: %reg1:gpr32 = COPY $w0
+    ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
+    ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK: $x0 = COPY %add
+    ; CHECK: RET_ReallyLR implicit $x0
+    %reg0:gpr(s64) = COPY $x0
+    %reg1:gpr(s32) = COPY $w0
+    %ext:gpr(s64) = G_ZEXT %reg1(s32)
+    %cst:gpr(s64) = G_CONSTANT i64 2
+    %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+    %add:gpr(s64), %flags:gpr(s1) = G_UADDO %reg0, %shift
+    $x0 = COPY %add(s64)
+    RET_ReallyLR implicit $x0