Skip to content

Commit 140e80a

Browse files
authored
[TTI] Add cost model support for [u|s]cmp (#106824)
This patch adds cost model support for [u|s]cmp.
1 parent 38dbcbd commit 140e80a

File tree

6 files changed

+1721
-1448
lines changed

6 files changed

+1721
-1448
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2196,6 +2196,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21962196
case Intrinsic::bitreverse:
21972197
ISD = ISD::BITREVERSE;
21982198
break;
2199+
case Intrinsic::ucmp:
2200+
ISD = ISD::UCMP;
2201+
break;
2202+
case Intrinsic::scmp:
2203+
ISD = ISD::SCMP;
2204+
break;
21992205
}
22002206

22012207
auto *ST = dyn_cast<StructType>(RetTy);
@@ -2433,6 +2439,33 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
24332439
}
24342440
return Cost;
24352441
}
2442+
case Intrinsic::ucmp:
2443+
case Intrinsic::scmp: {
2444+
Type *CmpTy = Tys[0];
2445+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2446+
InstructionCost Cost =
2447+
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2448+
CmpIntrinsic::getGTPredicate(IID),
2449+
CostKind) +
2450+
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2451+
CmpIntrinsic::getLTPredicate(IID),
2452+
CostKind);
2453+
2454+
if (TLI->shouldExpandCmpUsingSelects()) {
2455+
// x < y ? -1 : (x > y ? 1 : 0)
2456+
Cost += 2 * thisT()->getCmpSelInstrCost(
2457+
BinaryOperator::Select, RetTy, CondTy,
2458+
ICmpInst::BAD_ICMP_PREDICATE, CostKind);
2459+
} else {
2460+
// zext(x > y) - zext(x < y)
2461+
Cost +=
2462+
2 * thisT()->getCastInstrCost(CastInst::ZExt, RetTy, CondTy,
2463+
TTI::CastContextHint::None, CostKind);
2464+
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
2465+
CostKind);
2466+
}
2467+
return Cost;
2468+
}
24362469
default:
24372470
break;
24382471
}

llvm/test/Analysis/CostModel/AArch64/cmp.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,55 @@ define void @andcmp() {
121121
%c64sle = icmp sle i64 %a64, 0
122122
ret void
123123
}
124+
125+
define void @uscmp() {
126+
; CHECK-THROUGHPUT-LABEL: 'uscmp'
127+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u8 = call i8 @llvm.ucmp.i8.i8(i8 undef, i8 undef)
128+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u16 = call i16 @llvm.ucmp.i16.i16(i16 undef, i16 undef)
129+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u32 = call i32 @llvm.ucmp.i32.i32(i32 undef, i32 undef)
130+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u64 = call i64 @llvm.ucmp.i64.i64(i64 undef, i64 undef)
131+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
132+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
133+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
134+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8 = call i8 @llvm.scmp.i8.i8(i8 undef, i8 undef)
135+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16 = call i16 @llvm.scmp.i16.i16(i16 undef, i16 undef)
136+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s32 = call i32 @llvm.scmp.i32.i32(i32 undef, i32 undef)
137+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s64 = call i64 @llvm.scmp.i64.i64(i64 undef, i64 undef)
138+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
139+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
140+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
141+
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
142+
;
143+
; CHECK-SIZE-LABEL: 'uscmp'
144+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u8 = call i8 @llvm.ucmp.i8.i8(i8 undef, i8 undef)
145+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u16 = call i16 @llvm.ucmp.i16.i16(i16 undef, i16 undef)
146+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u32 = call i32 @llvm.ucmp.i32.i32(i32 undef, i32 undef)
147+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u64 = call i64 @llvm.ucmp.i64.i64(i64 undef, i64 undef)
148+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
149+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
150+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
151+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8 = call i8 @llvm.scmp.i8.i8(i8 undef, i8 undef)
152+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16 = call i16 @llvm.scmp.i16.i16(i16 undef, i16 undef)
153+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s32 = call i32 @llvm.scmp.i32.i32(i32 undef, i32 undef)
154+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s64 = call i64 @llvm.scmp.i64.i64(i64 undef, i64 undef)
155+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
156+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
157+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
158+
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
159+
;
160+
%u8 = call i8 @llvm.ucmp(i8 undef, i8 undef)
161+
%u16 = call i16 @llvm.ucmp(i16 undef, i16 undef)
162+
%u32 = call i32 @llvm.ucmp(i32 undef, i32 undef)
163+
%u64 = call i64 @llvm.ucmp(i64 undef, i64 undef)
164+
%uv16i8 = call <16 x i8> @llvm.ucmp(<16 x i8> undef, <16 x i8> undef)
165+
%uv8i16 = call <8 x i16> @llvm.ucmp(<8 x i16> undef, <8 x i16> undef)
166+
%uv4i32 = call <4 x i32> @llvm.ucmp(<4 x i32> undef, <4 x i32> undef)
167+
%s8 = call i8 @llvm.scmp(i8 undef, i8 undef)
168+
%s16 = call i16 @llvm.scmp(i16 undef, i16 undef)
169+
%s32 = call i32 @llvm.scmp(i32 undef, i32 undef)
170+
%s64 = call i64 @llvm.scmp(i64 undef, i64 undef)
171+
%sv16i8 = call <16 x i8> @llvm.scmp(<16 x i8> undef, <16 x i8> undef)
172+
%sv8i16 = call <8 x i16> @llvm.scmp(<8 x i16> undef, <8 x i16> undef)
173+
%sv4i32 = call <4 x i32> @llvm.scmp(<4 x i32> undef, <4 x i32> undef)
174+
ret void
175+
}

0 commit comments

Comments
 (0)