-
Notifications
You must be signed in to change notification settings - Fork 13.5k
release/20.x: Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) #126487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@preames What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) ChangesBackport 5921295 Requested by: @RKSimon Patch is 20.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126487.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2532edc5d86990e..19963e780ebd350 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12258,12 +12258,18 @@ InstructionCost BoUpSLP::getSpillCost() const {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
if (II->isAssumeLikeIntrinsic())
return true;
- IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II);
+ FastMathFlags FMF;
+ SmallVector<Type *, 4> Tys;
+ for (auto &ArgOp : II->args())
+ Tys.push_back(ArgOp->getType());
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+ IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
+ FMF);
InstructionCost IntrCost =
TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
- InstructionCost CallCost =
- TTI->getCallInstrCost(nullptr, II->getType(), ICA.getArgTypes(),
- TTI::TCK_RecipThroughput);
+ InstructionCost CallCost = TTI->getCallInstrCost(
+ nullptr, II->getType(), Tys, TTI::TCK_RecipThroughput);
if (IntrCost < CallCost)
return true;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 5ad676537f9c457..9ce79e5ea356b98 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -684,27 +684,27 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1
; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]]
-; CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[MUL]], 2
+; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4
+; CHECK-NEXT: [[ADD14:%.*]] = or disjoint i32 [[MUL]], 1
; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
; CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3
; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4
; CHECK-NEXT: [[ADD26:%.*]] = add nsw i32 [[MUL21]], 1
; CHECK-NEXT: [[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64
-; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]]
+; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]]
; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i8, ptr [[Y:%.*]], i64 8
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]]
-; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]]
-; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]]
+; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]]
; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4
-; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]]
+; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]]
; CHECK-NEXT: [[ARRAYIDX72:%.*]] = getelementptr inbounds nuw i8, ptr [[Z:%.*]], i64 4
; CHECK-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP3]], [[TMP0]]
; CHECK-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 24
@@ -715,22 +715,25 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <2 x i32> [[TMP8]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP9]], [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[ARRAYIDX84:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 28
; CHECK-NEXT: [[MUL81:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT: [[ARRAYIDX82:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 32
+; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX52]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP14]], [[TMP13]]
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 44
+; CHECK-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 36
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[ARRAYIDX28]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4
-; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[ARRAYIDX49]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[ARRAYIDX65]], align 4
; CHECK-NEXT: store i32 [[MUL73]], ptr [[Z]], align 4
; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[ARRAYIDX72]], align 4
-; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX76]], align 4
+; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX82]], align 4
+; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX76]], align 4
; CHECK-NEXT: store i32 [[MUL87]], ptr [[ARRAYIDX88]], align 4
-; CHECK-NEXT: [[TMP20:%.*]] = mul nsw <2 x i32> [[TMP15]], [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = mul nsw <2 x i32> [[TMP16]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP21]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: store <4 x i32> [[TMP19]], ptr [[ARRAYIDX84]], align 4
+; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> [[TMP18]], [[TMP17]]
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: store <2 x i32> [[TMP20]], ptr [[ARRAYIDX92]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
index 6576cbe075b740b..00a4417ba7aff2a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
@@ -716,29 +716,29 @@ define float @reduce_float_case3(ptr %a) {
; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6
; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7
-; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[A]], align 4
-; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP1]], align 4
-; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP3]], align 4
-; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP4]], align 4
-; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP5]], align 4
-; CHECK-NEXT: [[LOAD8:%.*]] = load float, ptr [[GEP6]], align 4
-; CHECK-NEXT: [[LOAD9:%.*]] = load float, ptr [[GEP7]], align 4
+; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
+; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4
+; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4
+; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4
+; CHECK-NEXT: [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]])
+; CHECK-NEXT: [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]])
; CHECK-NEXT: [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]])
; CHECK-NEXT: [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]])
; CHECK-NEXT: [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]])
; CHECK-NEXT: [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]])
; CHECK-NEXT: [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]])
; CHECK-NEXT: [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]])
-; CHECK-NEXT: [[LOG8:%.*]] = call float @llvm.log.f32(float [[LOAD8]])
-; CHECK-NEXT: [[LOG9:%.*]] = call float @llvm.log.f32(float [[LOAD9]])
-; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOG2]], [[LOG3]]
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]]
; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]]
; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]]
; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
-; CHECK-NEXT: [[ADD8:%.*]] = fadd float [[ADD6]], [[LOG7]]
-; CHECK-NEXT: [[ADD9:%.*]] = fadd float [[ADD8]], [[LOG8]]
-; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD9]], [[LOG9]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
; CHECK-NEXT: ret float [[ADD7]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
index a3be8f5e935c9e3..46c6c10125b95ff 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
@@ -358,12 +358,12 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
-; GFX8-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
-; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
+; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
-; GFX8-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX8-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[INS_1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
;
; GFX9-LABEL: @uadd_sat_v4i16(
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index 11fa3337544a1ae..257e4660c80aabb 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -28,9 +28,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
+; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32>
+; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32>
@@ -46,6 +50,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]]
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
+; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP7]] to i32
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32>
; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]]
@@ -59,8 +64,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]]
; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]]
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0
-; CHECK-NEXT: [[CONV_2:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
-; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2]], [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
+; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP45]], [[TMP43]]
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1
; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]]
@@ -115,7 +120,15 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]]
; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]]
; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]]
-; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15
+; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP77]], 15
+; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
+; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
+; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP45]], 15
+; CHECK-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537
+; CHECK-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535
+; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]]
+; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]]
+; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV9_2]], 15
; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537
; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535
; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15
@@ -231,10 +244,10 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]]
; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]]
; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]]
-; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1]], [[ADD103]]
+; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]]
; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]]
-; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1]], [[ADD105]]
-; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2]]
+; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]]
+; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP45]]
; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]]
; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]]
; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]]
@@ -242,42 +255,21 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]]
; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]]
; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]]
-; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32>
-; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0
-; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0
-; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]]
-; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]]
-; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15)
-; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537)
-; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535)
-; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0
-; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0
-; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]]
-; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]]
-; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1
-; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1
+; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD55_1]], [[ADD55]]
+; CHECK-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]]
+; CHECK-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]]
; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]]
-; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]]
-; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0
-; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0
-; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]]
; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]]
-; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]]
-; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]]
+; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]]
+; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[CONV9_2]]
+; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]]
+; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[CONV_2]]
;...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
…o improve cost analysis." (llvm#124962) Reverts llvm#124129 as its currently causing a regression at llvm#124499 - avoids the regression until a proper fix can be added to getSpillCost (cherry picked from commit 5921295)
@RKSimon (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 5921295
Requested by: @RKSimon