[AArch64][GlobalISel] Support for folding G_ROTR as shifted operands.

aemerson · aemerson · commit 6d9505b8e010 · 2021-09-02T21:37:24.000-07:00
This allows selection like: eor w0, w1, w2, ror #8 Saves 500 bytes on ClamAV -Os, which is 0.1%. Differential Revision: https://reviews.llvm.org/D109206
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -379,18 +379,15 @@ class AArch64InstructionSelector : public InstructionSelector {
     return selectAddrModeWRO(Root, Width / 8);
   }
 
-  ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
+  ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
+                                           bool AllowROR = false) const;
 
   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
     return selectShiftedRegister(Root);
   }
 
   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
-    // TODO: selectShiftedRegister should allow for rotates on logical shifts.
-    // For now, make them the same. The only difference between the two is that
-    // logical shifts are allowed to fold in rotates. Otherwise, these are
-    // functionally the same.
-    return selectShiftedRegister(Root);
+    return selectShiftedRegister(Root, true);
   }
 
   /// Given an extend instruction, determine the correct shift-extend type for
@@ -6014,7 +6011,6 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
 /// Given a shift instruction, return the correct shift type for that
 /// instruction.
 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
-  // TODO: Handle AArch64_AM::ROR
   switch (MI.getOpcode()) {
   default:
     return AArch64_AM::InvalidShiftExtend;
@@ -6024,30 +6020,31 @@ static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
     return AArch64_AM::LSR;
   case TargetOpcode::G_ASHR:
     return AArch64_AM::ASR;
+  case TargetOpcode::G_ROTR:
+    return AArch64_AM::ROR;
   }
 }
 
 /// Select a "shifted register" operand. If the value is not shifted, set the
 /// shift operand to a default value of "lsl 0".
-///
-/// TODO: Allow shifted register to be rotated in logical instructions.
 InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
+AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
+                                                  bool AllowROR) const {
   if (!Root.isReg())
     return None;
   MachineRegisterInfo &MRI =
       Root.getParent()->getParent()->getParent()->getRegInfo();
 
   // Check if the operand is defined by an instruction which corresponds to
   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
-  //
-  // TODO: Handle AArch64_AM::ROR for logical instructions.
   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
   if (!ShiftInst)
     return None;
   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
   if (ShType == AArch64_AM::InvalidShiftExtend)
     return None;
+  if (ShType == AArch64_AM::ROR && !AllowROR)
+    return None;
   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
     return None;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-rotate.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-rotate.mir
@@ -0,0 +1,67 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
+---
+name:            fold_ror_eor
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0' }
+body:             |
+  bb.1.entry:
+    liveins: $w0
+
+    ; Our codegen differs from SDAG here, we decide to fold in LHS
+    ; operand instead of RHS since they're both rotates and XOR is commutative.
+    ; Either is valid.
+
+    ; CHECK-LABEL: name: fold_ror_eor
+    ; CHECK: liveins: $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[EXTRWrri:%[0-9]+]]:gpr32 = EXTRWrri [[COPY]], [[COPY]], 11
+    ; CHECK: [[EORWrs:%[0-9]+]]:gpr32 = EORWrs [[EXTRWrri]], [[COPY]], 198
+    ; CHECK: $w0 = COPY [[EORWrs]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %13:gpr(s64) = G_CONSTANT i64 6
+    %2:gpr(s32) = G_ROTR %0, %13(s64)
+    %14:gpr(s64) = G_CONSTANT i64 11
+    %4:gpr(s32) = G_ROTR %0, %14(s64)
+    %5:gpr(s32) = G_XOR %2, %4
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            fold_ror_eor_rhs_only
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0' }
+  - { reg: '$w1' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: fold_ror_eor_rhs_only
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: [[EORWrs:%[0-9]+]]:gpr32 = EORWrs [[COPY1]], [[COPY]], 198
+    ; CHECK: $w0 = COPY [[EORWrs]]
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %9:gpr(s64) = G_CONSTANT i64 6
+    %3:gpr(s32) = G_ROTR %0, %9(s64)
+    %4:gpr(s32) = G_XOR %1, %3
+    $w0 = COPY %4(s32)
+    RET_ReallyLR implicit $w0
+
+...