[AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in early-ifcvt #162993

Il-Capitano · 2025-10-11T11:09:51Z

Early if conversion can create instruction sequences such as

mov  x1, #1
csel x0, x1, x2, eq

which could be simplified into the following instead

csinc x0, x2, xzr, ne

One notable example that generates code like this is cmpxchg weak.

This is fixed by handling an immediate value of 1 as add(wzr, 1) so that the addition can be folded into CSEL by using CSINC instead.

llvmbot · 2025-10-11T11:10:22Z

@llvm/pr-subscribers-backend-aarch64

Author: Csanád Hajdú (Il-Capitano)

Changes

Early if conversion can create instruction sequences such as

mov  x1, #<!-- -->1
csel x0, x1, x2, eq

which could be simplified into the following instead

csinc x0, x2, xzr, ne

One notable example that generates code like this is cmpxchg weak.

This is fixed by handling an immediate value of 1 as add(wzr, 1) so that the addition can be folded into CSEL by using CSINC instead.

Full diff: https://github.com/llvm/llvm-project/pull/162993.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+38-6)
(modified) llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll (+80)
(modified) llvm/test/CodeGen/AArch64/peephole-csel.ll (+2-3)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b8761d971a67d..578fd321b4873 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -708,8 +708,32 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
   unsigned Opc = 0;
-  unsigned SrcOpNum = 0;
+  unsigned SrcReg = 0;
   switch (DefMI->getOpcode()) {
+  case AArch64::SUBREG_TO_REG:
+    // Check for the following way to define an 64-bit immediate:
+    //   %0:gpr32 = MOVi32imm 1
+    //   %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
+    if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
+      return 0;
+    if (!DefMI->getOperand(2).isReg())
+      return 0;
+    if (!DefMI->getOperand(3).isImm() ||
+        DefMI->getOperand(3).getImm() != AArch64::sub_32)
+      return 0;
+    DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
+    if (DefMI->getOpcode() != AArch64::MOVi32imm)
+      return 0;
+    // fall-through to MOVi32imm case.
+    [[fallthrough]];
+  case AArch64::MOVi32imm:
+  case AArch64::MOVi64imm:
+    if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+      return 0;
+    SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+    Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
+    break;
+
   case AArch64::ADDSXri:
   case AArch64::ADDSWri:
     // if NZCV is used, do not fold.
@@ -724,7 +748,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
         DefMI->getOperand(3).getImm() != 0)
       return 0;
-    SrcOpNum = 1;
+    SrcReg = DefMI->getOperand(1).getReg();
     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
     break;
 
@@ -734,7 +758,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
       return 0;
-    SrcOpNum = 2;
+    SrcReg = DefMI->getOperand(2).getReg();
     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
     break;
   }
@@ -753,17 +777,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
       return 0;
-    SrcOpNum = 2;
+    SrcReg = DefMI->getOperand(2).getReg();
     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
     break;
   }
   default:
     return 0;
   }
-  assert(Opc && SrcOpNum && "Missing parameters");
+  assert(Opc && SrcReg && "Missing parameters");
 
   if (NewVReg)
-    *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
+    *NewVReg = SrcReg;
   return Opc;
 }
 
@@ -976,6 +1000,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
 
     // Fold the operation. Leave any dead instructions for DCE to clean up.
     if (FoldedOpc) {
+      // NewVReg might be XZR/WZR. In that case create a COPY into a virtual
+      // register.
+      if (!Register::isVirtualRegister(NewVReg)) {
+        unsigned ZeroReg = NewVReg;
+        NewVReg = MRI.createVirtualRegister(RC);
+        BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewVReg).addReg(ZeroReg);
+      }
+
       FalseReg = NewVReg;
       Opc = FoldedOpc;
       // The extends the live range of NewVReg.
diff --git a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 97a7741bcde75..849323f0fedf3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -421,3 +421,83 @@ for.body51:                                       ; preds = %is_sbox.exit155
   unreachable
 }
 declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp
+
+; CHECK-LABEL: fold_imm1_csinc_32:
+; CHECK:      cmp w0, w1
+; CHECK-NEXT: csinc w0, w2, wzr, ge
+; CHECK-NEXT: ret
+define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp {
+entry:
+  %cmp = icmp slt i32 %x, %y
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %exit
+
+if.else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ 1, %if.then ], [ %n, %if.else ]
+  ret i32 %result
+}
+
+; CHECK-LABEL: fold_imm1_csinc_64:
+; CHECK:      cmp x0, x1
+; CHECK-NEXT: csinc x0, x2, xzr, ge
+; CHECK-NEXT: ret
+define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp {
+entry:
+  %cmp = icmp slt i64 %x, %y
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %exit
+
+if.else:
+  br label %exit
+
+exit:
+  %result = phi i64 [ 1, %if.then ], [ %n, %if.else ]
+  ret i64 %result
+}
+
+; CHECK-LABEL: fold_imm1_cset_32:
+; CHECK:      cmp w0, w1
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp {
+entry:
+  %cmp = icmp slt i32 %x, %y
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %exit
+
+if.else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ 1, %if.then ], [ 0, %if.else ]
+  ret i32 %result
+}
+
+; CHECK-LABEL: fold_imm1_cset_64:
+; CHECK:      cmp x0, x1
+; CHECK-NEXT: cset x0, lt
+; CHECK-NEXT: ret
+define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp {
+entry:
+  %cmp = icmp slt i64 %x, %y
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %exit
+
+if.else:
+  br label %exit
+
+exit:
+  %result = phi i64 [ 1, %if.then ], [ 0, %if.else ]
+  ret i64 %result
+}
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll
index 868b9f1f2f6ac..b085258059b7e 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll
@@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
 ; CHECK-LABEL: peephole_csel:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    tst w2, #0x1
-; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    mov x9, xzr
+; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:    tst w1, #0x1
-; CHECK-NEXT:    csel x8, x8, x9, eq
+; CHECK-NEXT:    csinc x8, x8, xzr, ne
 ; CHECK-NEXT:    str x8, [x0]
 ; CHECK-NEXT:    ret
 entry:

citymarina · 2025-10-15T11:42:19Z

Thanks for adding me but I don't know this area well enough to review. Hopefully @davemgreen or @nikic will have a chance to take a look.

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/test/CodeGen/AArch64/peephole-csel.ll

Early if conversion can create instruction sequences such as ``` mov x1, llvm#1 csel x0, x1, x2, eq ``` which could be simplified into the following instead ``` csinc x0, x2, xzr, ne ``` One notable example that generates code like this is `cmpxchg weak`. This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so that the addition can be folded into CSEL by using CSINC instead.

* Remove fallthrough * Rename NewVReg -> NewReg

* NewVReg -> NewReg in comments. * Use WZR/XZR directly in the folded instruction.

Il-Capitano · 2025-10-20T08:32:03Z

The previous CI failures seem to be unrelated to my change, so I've rebased it. Once CI is all green I'll merge it.

llvmbot added the backend:AArch64 label Oct 11, 2025

Il-Capitano requested review from citymarina, davemgreen and nikic October 11, 2025 11:10

citymarina removed their request for review October 15, 2025 11:42

nikic requested review from paulwalker-arm and sdesmalen-arm and removed request for nikic October 15, 2025 13:53

paulwalker-arm reviewed Oct 15, 2025

View reviewed changes

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Outdated Show resolved Hide resolved

paulwalker-arm reviewed Oct 17, 2025

View reviewed changes

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Outdated Show resolved Hide resolved

rj-jesus reviewed Oct 17, 2025

View reviewed changes

llvm/test/CodeGen/AArch64/peephole-csel.ll Show resolved Hide resolved

paulwalker-arm approved these changes Oct 17, 2025

View reviewed changes

Il-Capitano added 3 commits October 20, 2025 09:48

Address review feedback

311dcc9

* Remove fallthrough * Rename NewVReg -> NewReg

Address review feedback

51f0dc6

* NewVReg -> NewReg in comments. * Use WZR/XZR directly in the folded instruction.

Il-Capitano force-pushed the early-ifcvt-imm-1 branch from 382ac01 to 51f0dc6 Compare October 20, 2025 08:30

Il-Capitano merged commit 7d356e9 into llvm:main Oct 20, 2025
15 of 18 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in early-ifcvt #162993

[AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in early-ifcvt #162993

Uh oh!

Il-Capitano commented Oct 11, 2025

Uh oh!

llvmbot commented Oct 11, 2025

Uh oh!

citymarina commented Oct 15, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Il-Capitano commented Oct 20, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[AArch64] Convert CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt #162993

[AArch64] Convert CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt #162993

Uh oh!

Conversation

Il-Capitano commented Oct 11, 2025

Uh oh!

llvmbot commented Oct 11, 2025

Uh oh!

citymarina commented Oct 15, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Il-Capitano commented Oct 20, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in early-ifcvt #162993

[AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in early-ifcvt #162993

citymarina commented Oct 15, 2025 •

edited

Loading