Skip to content

Commit 43e36d8

Browse files
committed
[CodeGenPrepare] Transform shl X, cttz(Y) to mul (Y & -Y), X if cttz is unsupported
1 parent 0a519f4 commit 43e36d8

File tree

2 files changed

+124
-140
lines changed

2 files changed

+124
-140
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8433,7 +8433,29 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
84338433
return true;
84348434

84358435
switch (I->getOpcode()) {
8436-
case Instruction::Shl:
8436+
case Instruction::Shl: {
8437+
// shl X, cttz(Y) -> mul (Y & -Y), X if cttz is unsupported on the target.
8438+
Value *Y;
8439+
if (match(I->getOperand(1),
8440+
m_OneUse(m_Intrinsic<Intrinsic::cttz>(m_Value(Y))))) {
8441+
EVT VT = TLI->getValueType(*DL, Y->getType());
8442+
if (!TLI->isOperationLegalOrCustom(ISD::CTTZ, VT) &&
8443+
TLI->isOperationLegalOrCustom(ISD::MUL, VT)) {
8444+
IRBuilder<> Builder(I);
8445+
Value *NegY = Builder.CreateNeg(Y);
8446+
Value *Power2 = Builder.CreateAnd(Y, NegY);
8447+
Value *New = Builder.CreateMul(Power2, I->getOperand(0), "",
8448+
/*HasNUW=*/I->hasNoUnsignedWrap(),
8449+
/*HasNSW=*/false);
8450+
replaceAllUsesWith(I, New, FreshBBs, IsHugeFunc);
8451+
RecursivelyDeleteTriviallyDeadInstructions(
8452+
I, TLInfo, nullptr,
8453+
[&](Value *V) { removeAllAssertingVHReferences(V); });
8454+
return true;
8455+
}
8456+
}
8457+
}
8458+
[[fallthrough]];
84378459
case Instruction::LShr:
84388460
case Instruction::AShr:
84398461
return optimizeShiftInst(cast<BinaryOperator>(I));

llvm/test/CodeGen/RISCV/shl-cttz.ll

Lines changed: 101 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -383,15 +383,7 @@ define i32 @shl_cttz_i32(i32 %x, i32 %y) {
383383
; RV32I: # %bb.0: # %entry
384384
; RV32I-NEXT: neg a2, a1
385385
; RV32I-NEXT: and a1, a1, a2
386-
; RV32I-NEXT: lui a2, 30667
387-
; RV32I-NEXT: addi a2, a2, 1329
388-
; RV32I-NEXT: mul a1, a1, a2
389-
; RV32I-NEXT: srli a1, a1, 27
390-
; RV32I-NEXT: lui a2, %hi(.LCPI4_0)
391-
; RV32I-NEXT: addi a2, a2, %lo(.LCPI4_0)
392-
; RV32I-NEXT: add a1, a2, a1
393-
; RV32I-NEXT: lbu a1, 0(a1)
394-
; RV32I-NEXT: sll a0, a0, a1
386+
; RV32I-NEXT: mul a0, a1, a0
395387
; RV32I-NEXT: ret
396388
;
397389
; RV32ZBB-LABEL: shl_cttz_i32:
@@ -400,26 +392,33 @@ define i32 @shl_cttz_i32(i32 %x, i32 %y) {
400392
; RV32ZBB-NEXT: sll a0, a0, a1
401393
; RV32ZBB-NEXT: ret
402394
;
403-
; RV64I-LABEL: shl_cttz_i32:
404-
; RV64I: # %bb.0: # %entry
405-
; RV64I-NEXT: negw a2, a1
406-
; RV64I-NEXT: and a1, a1, a2
407-
; RV64I-NEXT: lui a2, 30667
408-
; RV64I-NEXT: addi a2, a2, 1329
409-
; RV64I-NEXT: mul a1, a1, a2
410-
; RV64I-NEXT: srliw a1, a1, 27
411-
; RV64I-NEXT: lui a2, %hi(.LCPI4_0)
412-
; RV64I-NEXT: addi a2, a2, %lo(.LCPI4_0)
413-
; RV64I-NEXT: add a1, a2, a1
414-
; RV64I-NEXT: lbu a1, 0(a1)
415-
; RV64I-NEXT: sllw a0, a0, a1
416-
; RV64I-NEXT: ret
395+
; RV64IILLEGALI32-LABEL: shl_cttz_i32:
396+
; RV64IILLEGALI32: # %bb.0: # %entry
397+
; RV64IILLEGALI32-NEXT: negw a2, a1
398+
; RV64IILLEGALI32-NEXT: and a1, a1, a2
399+
; RV64IILLEGALI32-NEXT: lui a2, 30667
400+
; RV64IILLEGALI32-NEXT: addi a2, a2, 1329
401+
; RV64IILLEGALI32-NEXT: mul a1, a1, a2
402+
; RV64IILLEGALI32-NEXT: srliw a1, a1, 27
403+
; RV64IILLEGALI32-NEXT: lui a2, %hi(.LCPI4_0)
404+
; RV64IILLEGALI32-NEXT: addi a2, a2, %lo(.LCPI4_0)
405+
; RV64IILLEGALI32-NEXT: add a1, a2, a1
406+
; RV64IILLEGALI32-NEXT: lbu a1, 0(a1)
407+
; RV64IILLEGALI32-NEXT: sllw a0, a0, a1
408+
; RV64IILLEGALI32-NEXT: ret
417409
;
418410
; RV64ZBB-LABEL: shl_cttz_i32:
419411
; RV64ZBB: # %bb.0: # %entry
420412
; RV64ZBB-NEXT: ctzw a1, a1
421413
; RV64ZBB-NEXT: sllw a0, a0, a1
422414
; RV64ZBB-NEXT: ret
415+
;
416+
; RV64ILEGALI32-LABEL: shl_cttz_i32:
417+
; RV64ILEGALI32: # %bb.0: # %entry
418+
; RV64ILEGALI32-NEXT: negw a2, a1
419+
; RV64ILEGALI32-NEXT: and a1, a1, a2
420+
; RV64ILEGALI32-NEXT: mulw a0, a1, a0
421+
; RV64ILEGALI32-NEXT: ret
423422
entry:
424423
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
425424
%res = shl i32 %x, %cttz
@@ -431,16 +430,7 @@ define i32 @shl_cttz_constant_i32(i32 %y) {
431430
; RV32I: # %bb.0: # %entry
432431
; RV32I-NEXT: neg a1, a0
433432
; RV32I-NEXT: and a0, a0, a1
434-
; RV32I-NEXT: lui a1, 30667
435-
; RV32I-NEXT: addi a1, a1, 1329
436-
; RV32I-NEXT: mul a0, a0, a1
437-
; RV32I-NEXT: srli a0, a0, 27
438-
; RV32I-NEXT: lui a1, %hi(.LCPI5_0)
439-
; RV32I-NEXT: addi a1, a1, %lo(.LCPI5_0)
440-
; RV32I-NEXT: add a0, a1, a0
441-
; RV32I-NEXT: lbu a0, 0(a0)
442-
; RV32I-NEXT: li a1, 4
443-
; RV32I-NEXT: sll a0, a1, a0
433+
; RV32I-NEXT: slli a0, a0, 2
444434
; RV32I-NEXT: ret
445435
;
446436
; RV32ZBB-LABEL: shl_cttz_constant_i32:
@@ -450,28 +440,35 @@ define i32 @shl_cttz_constant_i32(i32 %y) {
450440
; RV32ZBB-NEXT: sll a0, a1, a0
451441
; RV32ZBB-NEXT: ret
452442
;
453-
; RV64I-LABEL: shl_cttz_constant_i32:
454-
; RV64I: # %bb.0: # %entry
455-
; RV64I-NEXT: negw a1, a0
456-
; RV64I-NEXT: and a0, a0, a1
457-
; RV64I-NEXT: lui a1, 30667
458-
; RV64I-NEXT: addi a1, a1, 1329
459-
; RV64I-NEXT: mul a0, a0, a1
460-
; RV64I-NEXT: srliw a0, a0, 27
461-
; RV64I-NEXT: lui a1, %hi(.LCPI5_0)
462-
; RV64I-NEXT: addi a1, a1, %lo(.LCPI5_0)
463-
; RV64I-NEXT: add a0, a1, a0
464-
; RV64I-NEXT: lbu a0, 0(a0)
465-
; RV64I-NEXT: li a1, 4
466-
; RV64I-NEXT: sllw a0, a1, a0
467-
; RV64I-NEXT: ret
443+
; RV64IILLEGALI32-LABEL: shl_cttz_constant_i32:
444+
; RV64IILLEGALI32: # %bb.0: # %entry
445+
; RV64IILLEGALI32-NEXT: negw a1, a0
446+
; RV64IILLEGALI32-NEXT: and a0, a0, a1
447+
; RV64IILLEGALI32-NEXT: lui a1, 30667
448+
; RV64IILLEGALI32-NEXT: addi a1, a1, 1329
449+
; RV64IILLEGALI32-NEXT: mul a0, a0, a1
450+
; RV64IILLEGALI32-NEXT: srliw a0, a0, 27
451+
; RV64IILLEGALI32-NEXT: lui a1, %hi(.LCPI5_0)
452+
; RV64IILLEGALI32-NEXT: addi a1, a1, %lo(.LCPI5_0)
453+
; RV64IILLEGALI32-NEXT: add a0, a1, a0
454+
; RV64IILLEGALI32-NEXT: lbu a0, 0(a0)
455+
; RV64IILLEGALI32-NEXT: li a1, 4
456+
; RV64IILLEGALI32-NEXT: sllw a0, a1, a0
457+
; RV64IILLEGALI32-NEXT: ret
468458
;
469459
; RV64ZBB-LABEL: shl_cttz_constant_i32:
470460
; RV64ZBB: # %bb.0: # %entry
471461
; RV64ZBB-NEXT: ctzw a0, a0
472462
; RV64ZBB-NEXT: li a1, 4
473463
; RV64ZBB-NEXT: sllw a0, a1, a0
474464
; RV64ZBB-NEXT: ret
465+
;
466+
; RV64ILEGALI32-LABEL: shl_cttz_constant_i32:
467+
; RV64ILEGALI32: # %bb.0: # %entry
468+
; RV64ILEGALI32-NEXT: negw a1, a0
469+
; RV64ILEGALI32-NEXT: and a0, a0, a1
470+
; RV64ILEGALI32-NEXT: slliw a0, a0, 2
471+
; RV64ILEGALI32-NEXT: ret
475472
entry:
476473
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
477474
%res = shl i32 4, %cttz
@@ -483,15 +480,7 @@ define i32 @shl_cttz_nuw_i32(i32 %x, i32 %y) {
483480
; RV32I: # %bb.0: # %entry
484481
; RV32I-NEXT: neg a2, a1
485482
; RV32I-NEXT: and a1, a1, a2
486-
; RV32I-NEXT: lui a2, 30667
487-
; RV32I-NEXT: addi a2, a2, 1329
488-
; RV32I-NEXT: mul a1, a1, a2
489-
; RV32I-NEXT: srli a1, a1, 27
490-
; RV32I-NEXT: lui a2, %hi(.LCPI6_0)
491-
; RV32I-NEXT: addi a2, a2, %lo(.LCPI6_0)
492-
; RV32I-NEXT: add a1, a2, a1
493-
; RV32I-NEXT: lbu a1, 0(a1)
494-
; RV32I-NEXT: sll a0, a0, a1
483+
; RV32I-NEXT: mul a0, a1, a0
495484
; RV32I-NEXT: ret
496485
;
497486
; RV32ZBB-LABEL: shl_cttz_nuw_i32:
@@ -500,26 +489,33 @@ define i32 @shl_cttz_nuw_i32(i32 %x, i32 %y) {
500489
; RV32ZBB-NEXT: sll a0, a0, a1
501490
; RV32ZBB-NEXT: ret
502491
;
503-
; RV64I-LABEL: shl_cttz_nuw_i32:
504-
; RV64I: # %bb.0: # %entry
505-
; RV64I-NEXT: negw a2, a1
506-
; RV64I-NEXT: and a1, a1, a2
507-
; RV64I-NEXT: lui a2, 30667
508-
; RV64I-NEXT: addi a2, a2, 1329
509-
; RV64I-NEXT: mul a1, a1, a2
510-
; RV64I-NEXT: srliw a1, a1, 27
511-
; RV64I-NEXT: lui a2, %hi(.LCPI6_0)
512-
; RV64I-NEXT: addi a2, a2, %lo(.LCPI6_0)
513-
; RV64I-NEXT: add a1, a2, a1
514-
; RV64I-NEXT: lbu a1, 0(a1)
515-
; RV64I-NEXT: sllw a0, a0, a1
516-
; RV64I-NEXT: ret
492+
; RV64IILLEGALI32-LABEL: shl_cttz_nuw_i32:
493+
; RV64IILLEGALI32: # %bb.0: # %entry
494+
; RV64IILLEGALI32-NEXT: negw a2, a1
495+
; RV64IILLEGALI32-NEXT: and a1, a1, a2
496+
; RV64IILLEGALI32-NEXT: lui a2, 30667
497+
; RV64IILLEGALI32-NEXT: addi a2, a2, 1329
498+
; RV64IILLEGALI32-NEXT: mul a1, a1, a2
499+
; RV64IILLEGALI32-NEXT: srliw a1, a1, 27
500+
; RV64IILLEGALI32-NEXT: lui a2, %hi(.LCPI6_0)
501+
; RV64IILLEGALI32-NEXT: addi a2, a2, %lo(.LCPI6_0)
502+
; RV64IILLEGALI32-NEXT: add a1, a2, a1
503+
; RV64IILLEGALI32-NEXT: lbu a1, 0(a1)
504+
; RV64IILLEGALI32-NEXT: sllw a0, a0, a1
505+
; RV64IILLEGALI32-NEXT: ret
517506
;
518507
; RV64ZBB-LABEL: shl_cttz_nuw_i32:
519508
; RV64ZBB: # %bb.0: # %entry
520509
; RV64ZBB-NEXT: ctzw a1, a1
521510
; RV64ZBB-NEXT: sllw a0, a0, a1
522511
; RV64ZBB-NEXT: ret
512+
;
513+
; RV64ILEGALI32-LABEL: shl_cttz_nuw_i32:
514+
; RV64ILEGALI32: # %bb.0: # %entry
515+
; RV64ILEGALI32-NEXT: negw a2, a1
516+
; RV64ILEGALI32-NEXT: and a1, a1, a2
517+
; RV64ILEGALI32-NEXT: mulw a0, a1, a0
518+
; RV64ILEGALI32-NEXT: ret
523519
entry:
524520
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
525521
%res = shl nuw i32 %x, %cttz
@@ -531,15 +527,7 @@ define i32 @shl_cttz_nsw_i32(i32 %x, i32 %y) {
531527
; RV32I: # %bb.0: # %entry
532528
; RV32I-NEXT: neg a2, a1
533529
; RV32I-NEXT: and a1, a1, a2
534-
; RV32I-NEXT: lui a2, 30667
535-
; RV32I-NEXT: addi a2, a2, 1329
536-
; RV32I-NEXT: mul a1, a1, a2
537-
; RV32I-NEXT: srli a1, a1, 27
538-
; RV32I-NEXT: lui a2, %hi(.LCPI7_0)
539-
; RV32I-NEXT: addi a2, a2, %lo(.LCPI7_0)
540-
; RV32I-NEXT: add a1, a2, a1
541-
; RV32I-NEXT: lbu a1, 0(a1)
542-
; RV32I-NEXT: sll a0, a0, a1
530+
; RV32I-NEXT: mul a0, a1, a0
543531
; RV32I-NEXT: ret
544532
;
545533
; RV32ZBB-LABEL: shl_cttz_nsw_i32:
@@ -548,26 +536,33 @@ define i32 @shl_cttz_nsw_i32(i32 %x, i32 %y) {
548536
; RV32ZBB-NEXT: sll a0, a0, a1
549537
; RV32ZBB-NEXT: ret
550538
;
551-
; RV64I-LABEL: shl_cttz_nsw_i32:
552-
; RV64I: # %bb.0: # %entry
553-
; RV64I-NEXT: negw a2, a1
554-
; RV64I-NEXT: and a1, a1, a2
555-
; RV64I-NEXT: lui a2, 30667
556-
; RV64I-NEXT: addi a2, a2, 1329
557-
; RV64I-NEXT: mul a1, a1, a2
558-
; RV64I-NEXT: srliw a1, a1, 27
559-
; RV64I-NEXT: lui a2, %hi(.LCPI7_0)
560-
; RV64I-NEXT: addi a2, a2, %lo(.LCPI7_0)
561-
; RV64I-NEXT: add a1, a2, a1
562-
; RV64I-NEXT: lbu a1, 0(a1)
563-
; RV64I-NEXT: sllw a0, a0, a1
564-
; RV64I-NEXT: ret
539+
; RV64IILLEGALI32-LABEL: shl_cttz_nsw_i32:
540+
; RV64IILLEGALI32: # %bb.0: # %entry
541+
; RV64IILLEGALI32-NEXT: negw a2, a1
542+
; RV64IILLEGALI32-NEXT: and a1, a1, a2
543+
; RV64IILLEGALI32-NEXT: lui a2, 30667
544+
; RV64IILLEGALI32-NEXT: addi a2, a2, 1329
545+
; RV64IILLEGALI32-NEXT: mul a1, a1, a2
546+
; RV64IILLEGALI32-NEXT: srliw a1, a1, 27
547+
; RV64IILLEGALI32-NEXT: lui a2, %hi(.LCPI7_0)
548+
; RV64IILLEGALI32-NEXT: addi a2, a2, %lo(.LCPI7_0)
549+
; RV64IILLEGALI32-NEXT: add a1, a2, a1
550+
; RV64IILLEGALI32-NEXT: lbu a1, 0(a1)
551+
; RV64IILLEGALI32-NEXT: sllw a0, a0, a1
552+
; RV64IILLEGALI32-NEXT: ret
565553
;
566554
; RV64ZBB-LABEL: shl_cttz_nsw_i32:
567555
; RV64ZBB: # %bb.0: # %entry
568556
; RV64ZBB-NEXT: ctzw a1, a1
569557
; RV64ZBB-NEXT: sllw a0, a0, a1
570558
; RV64ZBB-NEXT: ret
559+
;
560+
; RV64ILEGALI32-LABEL: shl_cttz_nsw_i32:
561+
; RV64ILEGALI32: # %bb.0: # %entry
562+
; RV64ILEGALI32-NEXT: negw a2, a1
563+
; RV64ILEGALI32-NEXT: and a1, a1, a2
564+
; RV64ILEGALI32-NEXT: mulw a0, a1, a0
565+
; RV64ILEGALI32-NEXT: ret
571566
entry:
572567
%cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true)
573568
%res = shl nsw i32 %x, %cttz
@@ -754,17 +749,9 @@ define i64 @shl_cttz_i64(i64 %x, i64 %y) {
754749
;
755750
; RV64I-LABEL: shl_cttz_i64:
756751
; RV64I: # %bb.0: # %entry
757-
; RV64I-NEXT: lui a2, %hi(.LCPI9_0)
758-
; RV64I-NEXT: ld a2, %lo(.LCPI9_0)(a2)
759-
; RV64I-NEXT: neg a3, a1
760-
; RV64I-NEXT: and a1, a1, a3
761-
; RV64I-NEXT: mul a1, a1, a2
762-
; RV64I-NEXT: srli a1, a1, 58
763-
; RV64I-NEXT: lui a2, %hi(.LCPI9_1)
764-
; RV64I-NEXT: addi a2, a2, %lo(.LCPI9_1)
765-
; RV64I-NEXT: add a1, a2, a1
766-
; RV64I-NEXT: lbu a1, 0(a1)
767-
; RV64I-NEXT: sll a0, a0, a1
752+
; RV64I-NEXT: neg a2, a1
753+
; RV64I-NEXT: and a1, a1, a2
754+
; RV64I-NEXT: mul a0, a1, a0
768755
; RV64I-NEXT: ret
769756
;
770757
; RV64ZBB-LABEL: shl_cttz_i64:
@@ -847,18 +834,9 @@ define i64 @shl_cttz_constant_i64(i64 %y) {
847834
;
848835
; RV64I-LABEL: shl_cttz_constant_i64:
849836
; RV64I: # %bb.0: # %entry
850-
; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
851-
; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
852-
; RV64I-NEXT: neg a2, a0
853-
; RV64I-NEXT: and a0, a0, a2
854-
; RV64I-NEXT: mul a0, a0, a1
855-
; RV64I-NEXT: srli a0, a0, 58
856-
; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
857-
; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
858-
; RV64I-NEXT: add a0, a1, a0
859-
; RV64I-NEXT: lbu a0, 0(a0)
860-
; RV64I-NEXT: li a1, 4
861-
; RV64I-NEXT: sll a0, a1, a0
837+
; RV64I-NEXT: neg a1, a0
838+
; RV64I-NEXT: and a0, a0, a1
839+
; RV64I-NEXT: slli a0, a0, 2
862840
; RV64I-NEXT: ret
863841
;
864842
; RV64ZBB-LABEL: shl_cttz_constant_i64:
@@ -944,17 +922,9 @@ define i64 @shl_cttz_nuw_i64(i64 %x, i64 %y) {
944922
;
945923
; RV64I-LABEL: shl_cttz_nuw_i64:
946924
; RV64I: # %bb.0: # %entry
947-
; RV64I-NEXT: lui a2, %hi(.LCPI11_0)
948-
; RV64I-NEXT: ld a2, %lo(.LCPI11_0)(a2)
949-
; RV64I-NEXT: neg a3, a1
950-
; RV64I-NEXT: and a1, a1, a3
951-
; RV64I-NEXT: mul a1, a1, a2
952-
; RV64I-NEXT: srli a1, a1, 58
953-
; RV64I-NEXT: lui a2, %hi(.LCPI11_1)
954-
; RV64I-NEXT: addi a2, a2, %lo(.LCPI11_1)
955-
; RV64I-NEXT: add a1, a2, a1
956-
; RV64I-NEXT: lbu a1, 0(a1)
957-
; RV64I-NEXT: sll a0, a0, a1
925+
; RV64I-NEXT: neg a2, a1
926+
; RV64I-NEXT: and a1, a1, a2
927+
; RV64I-NEXT: mul a0, a1, a0
958928
; RV64I-NEXT: ret
959929
;
960930
; RV64ZBB-LABEL: shl_cttz_nuw_i64:
@@ -1039,17 +1009,9 @@ define i64 @shl_cttz_nsw_i64(i64 %x, i64 %y) {
10391009
;
10401010
; RV64I-LABEL: shl_cttz_nsw_i64:
10411011
; RV64I: # %bb.0: # %entry
1042-
; RV64I-NEXT: lui a2, %hi(.LCPI12_0)
1043-
; RV64I-NEXT: ld a2, %lo(.LCPI12_0)(a2)
1044-
; RV64I-NEXT: neg a3, a1
1045-
; RV64I-NEXT: and a1, a1, a3
1046-
; RV64I-NEXT: mul a1, a1, a2
1047-
; RV64I-NEXT: srli a1, a1, 58
1048-
; RV64I-NEXT: lui a2, %hi(.LCPI12_1)
1049-
; RV64I-NEXT: addi a2, a2, %lo(.LCPI12_1)
1050-
; RV64I-NEXT: add a1, a2, a1
1051-
; RV64I-NEXT: lbu a1, 0(a1)
1052-
; RV64I-NEXT: sll a0, a0, a1
1012+
; RV64I-NEXT: neg a2, a1
1013+
; RV64I-NEXT: and a1, a1, a2
1014+
; RV64I-NEXT: mul a0, a1, a0
10531015
; RV64I-NEXT: ret
10541016
;
10551017
; RV64ZBB-LABEL: shl_cttz_nsw_i64:

0 commit comments

Comments
 (0)