Skip to content

Commit 029a46b

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:4f56d47d050e into amd-gfx:7cd9c5eed736
Local branch amd-gfx 7cd9c5e Merged main:4aeb7a0f5e35 into amd-gfx:c48ae1cfb717 Remote branch main 4f56d47 [VPlan] Make ExpandedSCEVs argument const (NFC).
2 parents 7cd9c5e + 4f56d47 commit 029a46b

File tree

6 files changed

+54
-47
lines changed

6 files changed

+54
-47
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 478281
19+
#define LLVM_MAIN_REVISION 478283
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ class LoopVectorizationPlanner {
354354
executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
355355
InnerLoopVectorizer &LB, DominatorTree *DT,
356356
bool IsEpilogueVectorization,
357-
DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
357+
const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
358358

359359
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
360360
void printPlans(raw_ostream &O);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7644,7 +7644,7 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
76447644
SCEV2ValueTy LoopVectorizationPlanner::executePlan(
76457645
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
76467646
InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization,
7647-
DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
7647+
const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
76487648
assert(BestVPlan.hasVF(BestVF) &&
76497649
"Trying to execute plan with unsupported VF");
76507650
assert(BestVPlan.hasUF(BestUF) &&

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -812,23 +812,48 @@ static bool isConstantOne(VPValue *V) {
812812
static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
813813
if (auto *WidenR = dyn_cast<VPWidenRecipe>(&R))
814814
return WidenR->getUnderlyingInstr()->getOpcode();
815+
if (auto *WidenC = dyn_cast<VPWidenCastRecipe>(&R))
816+
return WidenC->getOpcode();
815817
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R))
816818
return RepR->getUnderlyingInstr()->getOpcode();
817819
if (auto *VPI = dyn_cast<VPInstruction>(&R))
818820
return VPI->getOpcode();
819821
return 0;
820822
}
821823

824+
/// Return the scalar size in bits for \p VPV if possible.
825+
static Type *getTypeForVPValue(VPValue *VPV) {
826+
// TODO: Replace with VPlan type inference once ready.
827+
if (auto *VPC = dyn_cast<VPWidenCastRecipe>(VPV))
828+
return VPC->getResultType();
829+
auto *UV = VPV->getUnderlyingValue();
830+
return UV->getType();
831+
}
832+
822833
/// Try to simplify recipe \p R.
823834
static void simplifyRecipe(VPRecipeBase &R) {
824-
unsigned Opcode = getOpcodeForRecipe(R);
825-
if (Opcode == Instruction::Mul) {
835+
switch (getOpcodeForRecipe(R)) {
836+
case Instruction::Mul: {
826837
VPValue *A = R.getOperand(0);
827838
VPValue *B = R.getOperand(1);
828839
if (isConstantOne(A))
829840
return R.getVPSingleValue()->replaceAllUsesWith(B);
830841
if (isConstantOne(B))
831842
return R.getVPSingleValue()->replaceAllUsesWith(A);
843+
break;
844+
}
845+
case Instruction::Trunc: {
846+
VPRecipeBase *Zext = R.getOperand(0)->getDefiningRecipe();
847+
if (!Zext || getOpcodeForRecipe(*Zext) != Instruction::ZExt)
848+
break;
849+
VPValue *A = Zext->getOperand(0);
850+
VPValue *Trunc = R.getVPSingleValue();
851+
if (getTypeForVPValue(Trunc) == getTypeForVPValue(A))
852+
Trunc->replaceAllUsesWith(A);
853+
break;
854+
}
855+
default:
856+
break;
832857
}
833858
}
834859

llvm/test/Transforms/LoopVectorize/if-pred-stores.ll

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -432,17 +432,13 @@ define void @minimal_bit_widths(i1 %c) {
432432
; UNROLL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
433433
; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]]
434434
; UNROLL: pred.store.if:
435-
; UNROLL-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32
436-
; UNROLL-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
437-
; UNROLL-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1
438-
; UNROLL-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32
439-
; UNROLL-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8
440-
; UNROLL-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1
435+
; UNROLL-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
436+
; UNROLL-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
441437
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]]
442438
; UNROLL: pred.store.continue3:
443439
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
444-
; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
445-
; UNROLL-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
440+
; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
441+
; UNROLL-NEXT: br i1 [[TMP6]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
446442
; UNROLL: for.end:
447443
; UNROLL-NEXT: ret void
448444
;
@@ -461,21 +457,17 @@ define void @minimal_bit_widths(i1 %c) {
461457
; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
462458
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
463459
; UNROLL-NOSIMPLIFY: pred.store.if:
464-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32
465-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
466-
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1
460+
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
467461
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
468462
; UNROLL-NOSIMPLIFY: pred.store.continue:
469463
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
470464
; UNROLL-NOSIMPLIFY: pred.store.if2:
471-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32
472-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8
473-
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1
465+
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
474466
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
475467
; UNROLL-NOSIMPLIFY: pred.store.continue3:
476468
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
477-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
478-
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
469+
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
470+
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
479471
; UNROLL-NOSIMPLIFY: middle.block:
480472
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
481473
; UNROLL-NOSIMPLIFY: scalar.ph:
@@ -515,27 +507,23 @@ define void @minimal_bit_widths(i1 %c) {
515507
; VEC-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
516508
; VEC-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
517509
; VEC: pred.store.if:
518-
; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
519-
; VEC-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
520-
; VEC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
521-
; VEC-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP5]] to i8
522-
; VEC-NEXT: store i8 [[TMP7]], ptr [[TMP6]], align 1
510+
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
511+
; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
512+
; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
523513
; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
524514
; VEC: pred.store.continue:
525-
; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
526-
; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
515+
; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
516+
; VEC-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
527517
; VEC: pred.store.if2:
528-
; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
529-
; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
530-
; VEC-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
531-
; VEC-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr undef, i64 [[TMP9]]
532-
; VEC-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP11]] to i8
533-
; VEC-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1
518+
; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
519+
; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr undef, i64 [[TMP7]]
520+
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
521+
; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
534522
; VEC-NEXT: br label [[PRED_STORE_CONTINUE3]]
535523
; VEC: pred.store.continue3:
536524
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
537-
; VEC-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
538-
; VEC-NEXT: br i1 [[TMP14]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
525+
; VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
526+
; VEC-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
539527
; VEC: for.end:
540528
; VEC-NEXT: ret void
541529
;
@@ -606,21 +594,17 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
606594
; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1
607595
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
608596
; UNROLL-NOSIMPLIFY: pred.store.if:
609-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32
610-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
611-
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1
597+
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
612598
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
613599
; UNROLL-NOSIMPLIFY: pred.store.continue:
614600
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
615601
; UNROLL-NOSIMPLIFY: pred.store.if2:
616-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32
617-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8
618-
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1
602+
; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
619603
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
620604
; UNROLL-NOSIMPLIFY: pred.store.continue3:
621605
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
622-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
623-
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
606+
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
607+
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
624608
; UNROLL-NOSIMPLIFY: middle.block:
625609
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
626610
; UNROLL-NOSIMPLIFY: scalar.ph:

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
2929
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
3030

3131
; CHECK: pred.store.if:
32-
; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3>
33-
; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4>
34-
; CHECK-NEXT: CLONE store ir<%tmp5>, ir<%tmp2>
32+
; CHECK-NEXT: CLONE store ir<%tmp3>, ir<%tmp2>
3533
; CHECK-NEXT: Successor(s): pred.store.continue
3634

3735
; CHECK: pred.store.continue:

0 commit comments

Comments
 (0)