Skip to content

Commit d1452bd

Browse files
committed
[TaiDup] Allow large number of predecessors/successors without phis.
This adjusts the threshold logic added in llvm#78582 to only trigger for cases where there are actually phis to duplicate in either TailBB or in one of the successors. In cases there are no phis, we only have to pay the cost of extra edges, but have no explosion in PHI related instructions. This improves performance of Python on some inputs by 2-3% on Apple Silicon CPUs.
1 parent 9174b54 commit d1452bd

File tree

2 files changed

+47
-36
lines changed

2 files changed

+47
-36
lines changed

llvm/lib/CodeGen/TailDuplicator.cpp

+15-8
Original file line numberDiff line numberDiff line change
@@ -573,14 +573,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
573573
if (TailBB.isSuccessor(&TailBB))
574574
return false;
575575

576-
// Duplicating a BB which has both multiple predecessors and successors will
577-
// result in a complex CFG and also may cause huge amount of PHI nodes. If we
578-
// want to remove this limitation, we have to address
579-
// https://github.com/llvm/llvm-project/issues/78578.
580-
if (TailBB.pred_size() > TailDupPredSize &&
581-
TailBB.succ_size() > TailDupSuccSize)
582-
return false;
583-
584576
// Set the limit on the cost to duplicate. When optimizing for size,
585577
// duplicate only one, because one branch instruction can be eliminated to
586578
// compensate for the duplication.
@@ -618,6 +610,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
618610
// Check the instructions in the block to determine whether tail-duplication
619611
// is invalid or unlikely to be profitable.
620612
unsigned InstrCount = 0;
613+
unsigned NumPhis = 0;
621614
for (MachineInstr &MI : TailBB) {
622615
// Non-duplicable things shouldn't be tail-duplicated.
623616
// CFI instructions are marked as non-duplicable, because Darwin compact
@@ -661,6 +654,20 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
661654

662655
if (InstrCount > MaxDuplicateCount)
663656
return false;
657+
NumPhis += MI.isPHI();
658+
}
659+
660+
// Duplicating a BB which has both multiple predecessors and successors will
661+
// may cause huge amount of PHI nodes. If we want to remove this limitation,
662+
// we have to address https://github.com/llvm/llvm-project/issues/78578.
663+
if (TailBB.pred_size() > TailDupPredSize &&
664+
TailBB.succ_size() > TailDupSuccSize) {
665+
// If TailBB or any of its successors contains a phi, we may have to add a
666+
// large number of additional phis with additional incoming values.
667+
if (NumPhis != 0 || any_of(TailBB.successors(), [](MachineBasicBlock *MBB) {
668+
return any_of(*MBB, [](MachineInstr &MI) { return MI.isPHI(); });
669+
}))
670+
return false;
664671
}
665672

666673
// Check if any of the successors of TailBB has a PHI node in which the

llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir

+32-28
Original file line numberDiff line numberDiff line change
@@ -538,43 +538,47 @@ body: |
538538
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
539539
; LIMIT-NEXT: {{ $}}
540540
; LIMIT-NEXT: bb.2:
541-
; LIMIT-NEXT: successors: %bb.7(0x80000000)
541+
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
542542
; LIMIT-NEXT: {{ $}}
543543
; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
544-
; LIMIT-NEXT: JMP_1 %bb.7
544+
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
545+
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
546+
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
547+
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
545548
; LIMIT-NEXT: {{ $}}
546549
; LIMIT-NEXT: bb.3:
547-
; LIMIT-NEXT: successors: %bb.7(0x80000000)
550+
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
548551
; LIMIT-NEXT: {{ $}}
549552
; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
550-
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
551-
; LIMIT-NEXT: JMP_1 %bb.7
553+
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
554+
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
555+
; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
556+
; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
557+
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg
552558
; LIMIT-NEXT: {{ $}}
553559
; LIMIT-NEXT: bb.4:
554-
; LIMIT-NEXT: successors: %bb.7(0x80000000)
560+
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
555561
; LIMIT-NEXT: {{ $}}
556562
; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
557-
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
558-
; LIMIT-NEXT: JMP_1 %bb.7
563+
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
564+
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
565+
; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
566+
; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
567+
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg
559568
; LIMIT-NEXT: {{ $}}
560569
; LIMIT-NEXT: bb.5:
561-
; LIMIT-NEXT: successors: %bb.7(0x80000000)
570+
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
562571
; LIMIT-NEXT: {{ $}}
563572
; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
564-
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
565-
; LIMIT-NEXT: JMP_1 %bb.7
573+
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
574+
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
575+
; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
576+
; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
577+
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg
566578
; LIMIT-NEXT: {{ $}}
567579
; LIMIT-NEXT: bb.6:
568580
; LIMIT-NEXT: successors:
569581
; LIMIT-NEXT: {{ $}}
570-
; LIMIT-NEXT: bb.7:
571-
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
572-
; LIMIT-NEXT: {{ $}}
573-
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
574-
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
575-
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
576-
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
577-
; LIMIT-NEXT: {{ $}}
578582
; LIMIT-NEXT: bb.9:
579583
; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
580584
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV32rm4]] :: (store (s32))
@@ -583,23 +587,23 @@ body: |
583587
; LIMIT-NEXT: {{ $}}
584588
; LIMIT-NEXT: bb.10:
585589
; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
586-
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
587-
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri5]] :: (store (s32))
588-
; LIMIT-NEXT: $eax = COPY [[SHR32ri5]]
590+
; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
591+
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri8]] :: (store (s32))
592+
; LIMIT-NEXT: $eax = COPY [[SHR32ri8]]
589593
; LIMIT-NEXT: RET 0, $eax
590594
; LIMIT-NEXT: {{ $}}
591595
; LIMIT-NEXT: bb.11:
592596
; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
593-
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
594-
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri6]] :: (store (s32))
595-
; LIMIT-NEXT: $eax = COPY [[SHR32ri6]]
597+
; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
598+
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri9]] :: (store (s32))
599+
; LIMIT-NEXT: $eax = COPY [[SHR32ri9]]
596600
; LIMIT-NEXT: RET 0, $eax
597601
; LIMIT-NEXT: {{ $}}
598602
; LIMIT-NEXT: bb.12:
599603
; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
600-
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
601-
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri7]] :: (store (s32))
602-
; LIMIT-NEXT: $eax = COPY [[SHR32ri7]]
604+
; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
605+
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri10]] :: (store (s32))
606+
; LIMIT-NEXT: $eax = COPY [[SHR32ri10]]
603607
; LIMIT-NEXT: RET 0, $eax
604608
;
605609
; NOLIMIT-LABEL: name: foo_no_phis

0 commit comments

Comments
 (0)