-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." #124962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…o improv…" This reverts commit d8cd8d5.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-vectorizers Author: Simon Pilgrim (RKSimon) ChangesReverts llvm/llvm-project#124129 as its currently causing a regression at #124499 - avoids the regression until a proper fix can be added to getSpillCost Patch is 20.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124962.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4204f35d1a20d6..5c02bc7bfa90aa 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12234,12 +12234,18 @@ InstructionCost BoUpSLP::getSpillCost() const {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
if (II->isAssumeLikeIntrinsic())
return true;
- IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II);
+ FastMathFlags FMF;
+ SmallVector<Type *, 4> Tys;
+ for (auto &ArgOp : II->args())
+ Tys.push_back(ArgOp->getType());
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+ IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
+ FMF);
InstructionCost IntrCost =
TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
- InstructionCost CallCost =
- TTI->getCallInstrCost(nullptr, II->getType(), ICA.getArgTypes(),
- TTI::TCK_RecipThroughput);
+ InstructionCost CallCost = TTI->getCallInstrCost(
+ nullptr, II->getType(), Tys, TTI::TCK_RecipThroughput);
if (IntrCost < CallCost)
return true;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 5ad676537f9c45..9ce79e5ea356b9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -684,27 +684,27 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1
; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]]
-; CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[MUL]], 2
+; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4
+; CHECK-NEXT: [[ADD14:%.*]] = or disjoint i32 [[MUL]], 1
; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
; CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3
; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4
; CHECK-NEXT: [[ADD26:%.*]] = add nsw i32 [[MUL21]], 1
; CHECK-NEXT: [[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64
-; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]]
+; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]]
; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i8, ptr [[Y:%.*]], i64 8
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]]
-; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]]
-; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]]
+; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]]
; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4
-; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]]
+; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]]
; CHECK-NEXT: [[ARRAYIDX72:%.*]] = getelementptr inbounds nuw i8, ptr [[Z:%.*]], i64 4
; CHECK-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP3]], [[TMP0]]
; CHECK-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 24
@@ -715,22 +715,25 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <2 x i32> [[TMP8]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP9]], [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[ARRAYIDX84:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 28
; CHECK-NEXT: [[MUL81:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT: [[ARRAYIDX82:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 32
+; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX52]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP14]], [[TMP13]]
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 44
+; CHECK-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 36
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[ARRAYIDX28]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4
-; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[ARRAYIDX49]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[ARRAYIDX65]], align 4
; CHECK-NEXT: store i32 [[MUL73]], ptr [[Z]], align 4
; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[ARRAYIDX72]], align 4
-; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX76]], align 4
+; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX82]], align 4
+; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX76]], align 4
; CHECK-NEXT: store i32 [[MUL87]], ptr [[ARRAYIDX88]], align 4
-; CHECK-NEXT: [[TMP20:%.*]] = mul nsw <2 x i32> [[TMP15]], [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = mul nsw <2 x i32> [[TMP16]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP21]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: store <4 x i32> [[TMP19]], ptr [[ARRAYIDX84]], align 4
+; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> [[TMP18]], [[TMP17]]
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: store <2 x i32> [[TMP20]], ptr [[ARRAYIDX92]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
index 6576cbe075b740..00a4417ba7aff2 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
@@ -716,29 +716,29 @@ define float @reduce_float_case3(ptr %a) {
; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6
; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7
-; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[A]], align 4
-; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP1]], align 4
-; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP3]], align 4
-; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP4]], align 4
-; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP5]], align 4
-; CHECK-NEXT: [[LOAD8:%.*]] = load float, ptr [[GEP6]], align 4
-; CHECK-NEXT: [[LOAD9:%.*]] = load float, ptr [[GEP7]], align 4
+; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
+; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4
+; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4
+; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4
+; CHECK-NEXT: [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]])
+; CHECK-NEXT: [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]])
; CHECK-NEXT: [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]])
; CHECK-NEXT: [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]])
; CHECK-NEXT: [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]])
; CHECK-NEXT: [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]])
; CHECK-NEXT: [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]])
; CHECK-NEXT: [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]])
-; CHECK-NEXT: [[LOG8:%.*]] = call float @llvm.log.f32(float [[LOAD8]])
-; CHECK-NEXT: [[LOG9:%.*]] = call float @llvm.log.f32(float [[LOAD9]])
-; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOG2]], [[LOG3]]
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]]
; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]]
; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]]
; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
-; CHECK-NEXT: [[ADD8:%.*]] = fadd float [[ADD6]], [[LOG7]]
-; CHECK-NEXT: [[ADD9:%.*]] = fadd float [[ADD8]], [[LOG8]]
-; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD9]], [[LOG9]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
; CHECK-NEXT: ret float [[ADD7]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
index a3be8f5e935c9e..46c6c10125b95f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
@@ -358,12 +358,12 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
-; GFX8-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
-; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
+; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
-; GFX8-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX8-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[INS_1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
;
; GFX9-LABEL: @uadd_sat_v4i16(
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index 11fa3337544a1a..257e4660c80aab 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -28,9 +28,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
+; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32>
+; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32>
@@ -46,6 +50,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]]
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
+; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP7]] to i32
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32>
; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]]
@@ -59,8 +64,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]]
; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]]
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0
-; CHECK-NEXT: [[CONV_2:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
-; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2]], [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
+; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP45]], [[TMP43]]
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1
; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]]
@@ -115,7 +120,15 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]]
; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]]
; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]]
-; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15
+; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP77]], 15
+; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
+; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
+; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP45]], 15
+; CHECK-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537
+; CHECK-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535
+; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]]
+; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]]
+; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV9_2]], 15
; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537
; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535
; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15
@@ -231,10 +244,10 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]]
; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]]
; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]]
-; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1]], [[ADD103]]
+; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]]
; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]]
-; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1]], [[ADD105]]
-; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2]]
+; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]]
+; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP45]]
; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]]
; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]]
; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]]
@@ -242,42 +255,21 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]]
; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]]
; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]]
-; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32>
-; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0
-; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0
-; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]]
-; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]]
-; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15)
-; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537)
-; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535)
-; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0
-; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0
-; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]]
-; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]]
-; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1
-; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1
+; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD55_1]], [[ADD55]]
+; CHECK-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]]
+; CHECK-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]]
; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]]
-; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]]
-; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0
-; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0
-; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]]
; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]]
-; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]]
-; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]]
+; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]]
+; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[CONV9_2]]
+; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]]
+; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[CONV_2]]
; CHECK-NEX...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
/cherry-pick 5921295 |
Error: Command failed due to missing milestone. |
/cherry-pick 5921295 |
/pull-request #126487 |
…o improve cost analysis." (llvm#124962) Reverts llvm#124129 as its currently causing a regression at llvm#124499 - avoids the regression until a proper fix can be added to getSpillCost (cherry picked from commit 5921295)
Reverts #124129 as its currently causing a regression at #124499 - avoids the regression until a proper fix can be added to getSpillCost