Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit 2baaf7c

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:a6dabed3483c into amd-gfx:adee0826382f
Local branch amd-gfx adee082 Merged main:75b3c3d267bf into amd-gfx:d648e114f351 Remote branch main a6dabed [AMDGPU] Fix nondeterminism in SIFixSGPRCopies (llvm#70644)
2 parents adee082 + a6dabed commit 2baaf7c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1394
-38
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,20 +1291,20 @@ class TargetInfo : public TransferrableTargetInfo,
12911291
fillValidCPUList(Values);
12921292
}
12931293

1294-
/// brief Determine whether this TargetInfo supports the given CPU name.
1294+
/// Determine whether this TargetInfo supports the given CPU name.
12951295
virtual bool isValidCPUName(StringRef Name) const {
12961296
return true;
12971297
}
12981298

1299-
/// brief Determine whether this TargetInfo supports the given CPU name for
1300-
// tuning.
1299+
/// Determine whether this TargetInfo supports the given CPU name for
1300+
/// tuning.
13011301
virtual bool isValidTuneCPUName(StringRef Name) const {
13021302
return isValidCPUName(Name);
13031303
}
13041304

13051305
virtual ParsedTargetAttr parseTargetAttr(StringRef Str) const;
13061306

1307-
/// brief Determine whether this TargetInfo supports tune in target attribute.
1307+
/// Determine whether this TargetInfo supports tune in target attribute.
13081308
virtual bool supportsTargetAttributeTune() const {
13091309
return false;
13101310
}

clang/include/clang/Parse/Parser.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -663,9 +663,9 @@ class Parser : public CodeCompletionHandler {
663663
return PrevTokLocation;
664664
}
665665

666-
///\ brief When we are consuming a code-completion token without having
667-
/// matched specific position in the grammar, provide code-completion results
668-
/// based on context.
666+
/// When we are consuming a code-completion token without having matched
667+
/// specific position in the grammar, provide code-completion results based
668+
/// on context.
669669
///
670670
/// \returns the source location of the code-completion token.
671671
SourceLocation handleUnexpectedCodeCompletionToken();

llvm/docs/LangRef.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18497,6 +18497,45 @@ Arguments:
1849718497
Both arguments must be vectors of the same type whereby their logical
1849818498
concatenation matches the result type.
1849918499

18500+
'``llvm.experimental.cttz.elts``' Intrinsic
18501+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18502+
18503+
Syntax:
18504+
"""""""
18505+
18506+
This is an overloaded intrinsic. You can use ```llvm.experimental.cttz.elts```
18507+
on any vector of integer elements, both fixed width and scalable.
18508+
18509+
::
18510+
18511+
declare i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> <src>, i1 <is_zero_poison>)
18512+
18513+
Overview:
18514+
"""""""""
18515+
18516+
The '``llvm.experimental.cttz.elts``' intrinsic counts the number of trailing
18517+
zero elements of a vector.
18518+
18519+
Arguments:
18520+
""""""""""
18521+
18522+
The first argument is the vector to be counted. This argument must be a vector
18523+
with integer element type. The return type must also be an integer type which is
18524+
wide enough to hold the maximum number of elements of the source vector. The
18525+
behaviour of this intrinsic is undefined if the return type is not wide enough
18526+
for the number of elements in the input vector.
18527+
18528+
The second argument is a constant flag that indicates whether the intrinsic
18529+
returns a valid result if the first argument is all zero. If the first argument
18530+
is all zero and the second argument is true, the result is poison.
18531+
18532+
Semantics:
18533+
""""""""""
18534+
18535+
The '``llvm.experimental.cttz.elts``' intrinsic counts the trailing (least
18536+
significant) zero elements in a vector. If ``src == 0`` the result is the
18537+
number of elements in the input vector.
18538+
1850018539
'``llvm.experimental.vector.splice``' Intrinsic
1850118540
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1850218541

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel);
259259

260260
/// Return the cost associated with a callsite, including parameter passing
261261
/// and the call/return instruction.
262-
int getCallsiteCost(const CallBase &Call, const DataLayout &DL);
262+
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
263+
const DataLayout &DL);
263264

264265
/// Get an InlineCost object representing the cost of inlining this
265266
/// callsite.

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,6 +1517,15 @@ class TargetTransformInfo {
15171517
bool areInlineCompatible(const Function *Caller,
15181518
const Function *Callee) const;
15191519

1520+
/// Returns a penalty for invoking call \p Call in \p F.
1521+
/// For example, if a function F calls a function G, which in turn calls
1522+
/// function H, then getInlineCallPenalty(F, H()) would return the
1523+
/// penalty of calling H from F, e.g. after inlining G into F.
1524+
/// \p DefaultCallPenalty is passed to give a default penalty that
1525+
/// the target can amend or override.
1526+
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1527+
unsigned DefaultCallPenalty) const;
1528+
15201529
/// \returns True if the caller and callee agree on how \p Types will be
15211530
/// passed to or returned from the callee.
15221531
/// to the callee.
@@ -2012,6 +2021,8 @@ class TargetTransformInfo::Concept {
20122021
std::optional<uint32_t> AtomicCpySize) const = 0;
20132022
virtual bool areInlineCompatible(const Function *Caller,
20142023
const Function *Callee) const = 0;
2024+
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2025+
unsigned DefaultCallPenalty) const = 0;
20152026
virtual bool areTypesABICompatible(const Function *Caller,
20162027
const Function *Callee,
20172028
const ArrayRef<Type *> &Types) const = 0;
@@ -2673,6 +2684,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
26732684
const Function *Callee) const override {
26742685
return Impl.areInlineCompatible(Caller, Callee);
26752686
}
2687+
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2688+
unsigned DefaultCallPenalty) const override {
2689+
return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2690+
}
26762691
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
26772692
const ArrayRef<Type *> &Types) const override {
26782693
return Impl.areTypesABICompatible(Caller, Callee, Types);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,11 @@ class TargetTransformInfoImplBase {
802802
Callee->getFnAttribute("target-features"));
803803
}
804804

805+
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
806+
unsigned DefaultCallPenalty) const {
807+
return DefaultCallPenalty;
808+
}
809+
805810
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
806811
const ArrayRef<Type *> &Types) const {
807812
return (Caller->getFnAttribute("target-cpu") ==

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,10 @@ class TargetLoweringBase {
465465
return true;
466466
}
467467

468+
/// Return true if the @llvm.experimental.cttz.elts intrinsic should be
469+
/// expanded using generic code in SelectionDAGBuilder.
470+
virtual bool shouldExpandCttzElements(EVT VT) const { return true; }
471+
468472
// Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
469473
// vecreduce(op(x, y)) for the reduction opcode RedOpc.
470474
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 478915
19+
#define LLVM_MAIN_REVISION 479203
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,6 +2182,11 @@ def int_experimental_get_vector_length:
21822182
[IntrNoMem, IntrNoSync, IntrWillReturn,
21832183
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
21842184

2185+
def int_experimental_cttz_elts:
2186+
DefaultAttrsIntrinsic<[llvm_anyint_ty],
2187+
[llvm_anyvector_ty, llvm_i1_ty],
2188+
[IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
2189+
21852190
def int_experimental_vp_splice:
21862191
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
21872192
[LLVMMatchType<0>,

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,6 +1953,11 @@ def int_amdgcn_inverse_ballot :
19531953
Intrinsic<[llvm_i1_ty], [llvm_anyint_ty],
19541954
[IntrNoMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
19551955

1956+
// Lowers to S_BITREPLICATE_B64_B32.
1957+
// The argument must be uniform; otherwise, the result is undefined.
1958+
def int_amdgcn_s_bitreplicate :
1959+
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
1960+
19561961
class AMDGPUWaveReduce<LLVMType data_ty = llvm_anyint_ty> : Intrinsic<
19571962
[data_ty],
19581963
[

0 commit comments

Comments
 (0)