Skip to content

Commit 6ba62f4

Browse files
authored
[AArch64][SME2] Refine fcvtu/fcvts/scvtf/ucvtf (#77947)
Rename intrinsics for fcvtu to fcvtzu and fcvts to fcvtzs. Use llvm_anyvector_ty for both multi vector returns and operands, therefore the return and operands can be specified in the intrinsic call, e.g. @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32
1 parent 0d8e333 commit 6ba62f4

File tree

8 files changed

+111
-121
lines changed

8 files changed

+111
-121
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,9 @@ namespace clang {
291291
bool isZExtReturn() const { return Flags & IsZExtReturn; }
292292
bool isByteIndexed() const { return Flags & IsByteIndexed; }
293293
bool isOverloadNone() const { return Flags & IsOverloadNone; }
294-
bool isOverloadWhile() const { return Flags & IsOverloadWhile; }
294+
bool isOverloadWhileOrMultiVecCvt() const {
295+
return Flags & IsOverloadWhileOrMultiVecCvt;
296+
}
295297
bool isOverloadDefault() const { return !(Flags & OverloadKindMask); }
296298
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
297299
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -749,14 +749,14 @@ def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone,
749749
////////////////////////////////////////////////////////////////////////////////
750750
// While comparisons
751751

752-
def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>;
753-
def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>;
754-
def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>;
755-
def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>;
756-
def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>;
757-
def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>;
758-
def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>;
759-
def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>;
752+
def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
753+
def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
754+
def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
755+
def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
756+
def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
757+
def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
758+
def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
759+
def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
760760

761761
////////////////////////////////////////////////////////////////////////////////
762762
// Counting bit
@@ -1336,14 +1336,14 @@ let TargetGuard = "sve2p1|sme2" in {
13361336
////////////////////////////////////////////////////////////////////////////////
13371337
// SVE2 WhileGE/GT
13381338
let TargetGuard = "sve2" in {
1339-
def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>;
1340-
def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>;
1341-
def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>;
1342-
def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>;
1343-
def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>;
1344-
def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>;
1345-
def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>;
1346-
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>;
1339+
def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1340+
def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1341+
def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1342+
def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1343+
def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1344+
def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1345+
def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
1346+
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
13471347
}
13481348

13491349
let TargetGuard = "sve2p1|sme2" in {
@@ -2244,15 +2244,15 @@ let TargetGuard = "sme2" in {
22442244
def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>;
22452245
def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>;
22462246

2247-
def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming], []>;
2248-
def SVCVT_U32_F32_X2 : SInst<"svcvt_u32[_{d}_x2]", "2.u2.d", "f", MergeNone, "aarch64_sve_fcvtu_x2", [IsStreaming], []>;
2249-
def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming], []>;
2250-
def SVCVT_S32_F32_X2 : SInst<"svcvt_s32[_{d}_x2]", "2.x2.d", "f", MergeNone, "aarch64_sve_fcvts_x2", [IsStreaming], []>;
2247+
def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
2248+
def SVCVT_U32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
2249+
def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
2250+
def SVCVT_S32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "i", MergeNone, "aarch64_sve_fcvtzs_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
22512251

2252-
def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming], []>;
2253-
def SVCVT_U32_F32_X4 : SInst<"svcvt_u32[_{d}_x4]", "4.u4.d", "f", MergeNone, "aarch64_sve_fcvtu_x4", [IsStreaming], []>;
2254-
def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming], []>;
2255-
def SVCVT_S32_F32_X4 : SInst<"svcvt_s32[_{d}_x4]", "4.x4.d", "f", MergeNone, "aarch64_sve_fcvts_x4", [IsStreaming], []>;
2252+
def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
2253+
def SVCVT_U32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
2254+
def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
2255+
def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
22562256
}
22572257

22582258
//

clang/include/clang/Basic/arm_sve_sme_incl.td

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -194,41 +194,41 @@ def FirstSplatOperand : FlagType<0x00000400>;
194194
// These flags are used to specify which scalar operand
195195
// needs to be duplicated/splatted into a vector.
196196
// : :
197-
def SplatOperandMask : FlagType<0x00001C00>;
198-
def IsLoad : FlagType<0x00002000>;
199-
def IsStore : FlagType<0x00004000>;
200-
def IsGatherLoad : FlagType<0x00008000>;
201-
def IsScatterStore : FlagType<0x00010000>;
202-
def IsStructLoad : FlagType<0x00020000>;
203-
def IsStructStore : FlagType<0x00040000>;
204-
def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
205-
def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
206-
def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
207-
def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
208-
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
209-
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
210-
def IsByteIndexed : FlagType<0x01000000>;
211-
def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
212-
def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
213-
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
214-
def IsGatherPrefetch : FlagType<0x10000000>;
215-
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
216-
def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
217-
def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
218-
def IsTupleCreate : FlagType<0x100000000>;
219-
def IsTupleGet : FlagType<0x200000000>;
220-
def IsTupleSet : FlagType<0x400000000>;
221-
def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
222-
def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X.
223-
def IsStreaming : FlagType<0x2000000000>;
224-
def IsStreamingCompatible : FlagType<0x4000000000>;
225-
def IsReadZA : FlagType<0x8000000000>;
226-
def IsWriteZA : FlagType<0x10000000000>;
227-
def IsReductionQV : FlagType<0x20000000000>;
228-
def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1.
229-
def IsInZA : FlagType<0x80000000000>;
230-
def IsOutZA : FlagType<0x100000000000>;
231-
def IsInOutZA : FlagType<0x200000000000>;
197+
def SplatOperandMask : FlagType<0x00001C00>;
198+
def IsLoad : FlagType<0x00002000>;
199+
def IsStore : FlagType<0x00004000>;
200+
def IsGatherLoad : FlagType<0x00008000>;
201+
def IsScatterStore : FlagType<0x00010000>;
202+
def IsStructLoad : FlagType<0x00020000>;
203+
def IsStructStore : FlagType<0x00040000>;
204+
def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
205+
def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
206+
def IsOverloadWhileOrMultiVecCvt : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
207+
def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
208+
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
209+
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
210+
def IsByteIndexed : FlagType<0x01000000>;
211+
def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
212+
def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
213+
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
214+
def IsGatherPrefetch : FlagType<0x10000000>;
215+
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
216+
def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
217+
def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
218+
def IsTupleCreate : FlagType<0x100000000>;
219+
def IsTupleGet : FlagType<0x200000000>;
220+
def IsTupleSet : FlagType<0x400000000>;
221+
def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
222+
def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X.
223+
def IsStreaming : FlagType<0x2000000000>;
224+
def IsStreamingCompatible : FlagType<0x4000000000>;
225+
def IsReadZA : FlagType<0x8000000000>;
226+
def IsWriteZA : FlagType<0x10000000000>;
227+
def IsReductionQV : FlagType<0x20000000000>;
228+
def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1.
229+
def IsInZA : FlagType<0x80000000000>;
230+
def IsOutZA : FlagType<0x100000000000>;
231+
def IsInOutZA : FlagType<0x200000000000>;
232232

233233
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
234234
class ImmCheckType<int val> {

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10056,7 +10056,7 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
1005610056

1005710057
llvm::Type *DefaultType = getSVEType(TypeFlags);
1005810058

10059-
if (TypeFlags.isOverloadWhile())
10059+
if (TypeFlags.isOverloadWhileOrMultiVecCvt())
1006010060
return {DefaultType, Ops[1]->getType()};
1006110061

1006210062
if (TypeFlags.isOverloadWhileRW())

0 commit comments

Comments
 (0)