diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 73fd521aeeec3..28baa2e45e423 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -103,6 +103,9 @@ clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs SOURCE arm_sme.td TARGET ClangARMSmeStreamingAttrs) +clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltinsZAState) clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def SOURCE arm_cde.td TARGET ClangARMCdeBuiltinsDef) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c8e32a63684f2..0d8da213088d9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3155,6 +3155,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error< def warn_attribute_arm_sm_incompat_builtin : Warning< "builtin call has undefined behaviour when called from a %0 function">, InGroup>; +def warn_attribute_arm_za_builtin_no_za_state : Warning< + "builtin call is not valid when calling from a function without active ZA state">, + InGroup>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 49be88051e654..42e29e4309378 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3174,6 +3174,25 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, } } +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr()) + return true; + if (const auto *T = FD->getType()->getAs()) + if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: + return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (const FunctionDecl *FD = getCurFunctionDecl()) { std::optional BuiltinType; @@ -3186,6 +3205,11 @@ bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinType) checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + + if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_za_builtin_no_za_state) + << TheCall->getSourceRange(); } // Range check SME intrinsics that take immediate values. diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c index ee6c1c9dd566b..55d2e355897f7 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn); } @@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn); } @@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn); } @@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn); } @@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn); } @@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn); } @@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn); } @@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c index 254ea89d22c50..8e9c2e7da46a3 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn); } @@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn); } @@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn); } @@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn); } @@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn); } @@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn); } @@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn); } @@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c index 5622568c4cd76..e17782db222b6 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -22,7 +22,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za8(0, slice_base, pg, ptr); svld1_hor_za8(0, slice_base + 15, pg, ptr); } @@ -45,7 +45,7 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za16(0, slice_base, pg, ptr); svld1_hor_za16(1, slice_base + 7, pg, ptr); } @@ -68,7 +68,7 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za32(0, slice_base, pg, ptr); svld1_hor_za32(3, slice_base + 3, pg, ptr); } @@ -91,7 +91,7 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za64(0, slice_base, pg, ptr); svld1_hor_za64(7, slice_base + 1, pg, ptr); } @@ -112,7 +112,7 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za128(0, slice_base, pg, ptr); svld1_hor_za128(15, slice_base, pg, ptr); } @@ -133,7 +133,7 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za8(0, slice_base, pg, ptr); svld1_ver_za8(0, slice_base + 15, pg, ptr); } @@ -156,7 +156,7 @@ void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za16(0, slice_base, pg, ptr); svld1_ver_za16(1, slice_base + 7, pg, ptr); } @@ -179,7 +179,7 @@ void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za32(0, slice_base, pg, ptr); svld1_ver_za32(3, slice_base + 3, pg, ptr); } @@ -202,7 +202,7 @@ void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za64(0, slice_base, pg, ptr); svld1_ver_za64(7, slice_base + 1, pg, ptr); } @@ -223,7 +223,7 @@ void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za128(0, slice_base, pg, ptr); svld1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c index 0fe7dcfc0a799..0fa77e1144a7d 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -28,7 +28,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -57,7 +57,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -86,7 +86,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -115,7 +115,7 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -142,7 +142,7 @@ void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum); } @@ -169,7 +169,7 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -198,7 +198,7 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -227,7 +227,7 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -256,7 +256,7 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -283,7 +283,7 @@ void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index 9af0778e89c5e..314c9645dd4f7 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -12,7 +12,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { +void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_shared_za { svldr_vnum_za(slice_base, ptr, 0); } @@ -22,7 +22,7 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { +void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_shared_za { svldr_vnum_za(slice_base, ptr, 15); } @@ -32,7 +32,7 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svldr_za(uint32_t slice_base, const void *ptr) { +void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_shared_za { svldr_za(slice_base, ptr); } @@ -43,7 +43,7 @@ void test_svldr_za(uint32_t slice_base, const void *ptr) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]]) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) { +void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_shared_za { svldr_vnum_za(slice_base, ptr, vnum); } @@ -53,6 +53,6 @@ void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) { +void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_shared_za { svldr_vnum_za(slice_base, ptr, 16); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c index b90c9be4a6e09..e84f31c2dfa92 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming { +void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming { +void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm); } @@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming { +void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm); } @@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming { +void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm); } @@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming { +void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm); } @@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming { +void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming { +void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c index a56ce4d17f126..1b22eb64e9e36 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming { +void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm); } @@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming { +void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm); } @@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming { +void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm); } @@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming { +void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm); } @@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming { +void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c index abcf4c2e698d7..0ff97ff92f714 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming { +void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming { +void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm); } @@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming { +void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm); } @@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming { +void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm); } @@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming { +void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm); } @@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming { +void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming { +void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c index b26b9e4e51e05..5b190a7f9b748 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming { +void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm); } @@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming { +void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm); } @@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming { +void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm); } @@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming { +void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm); } @@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming { +void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c index a15599d186a87..843a96da90278 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base); } @@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice); } @@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base); } @@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice); } @@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base); } @@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice); } @@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base); } @@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice); } @@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base); } @@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice); } @@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base); } @@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice); } @@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base); } @@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice); } @@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base); } @@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice); } @@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base); } @@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice); } @@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base); } @@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice); } @@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base); } @@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice); } @@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base); } @@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice); } @@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base); } @@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base); } @@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base); } @@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base); } @@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base); } @@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base); } @@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base); } @@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base); } @@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base); } @@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base); } @@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base); } @@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base); } @@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base); } @@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base); } @@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base); } @@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base); } @@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base); } @@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base); } @@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base); } @@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base); } @@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base); } @@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base); } @@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base); } @@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base); } @@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base); } @@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice); } @@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base); } @@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice); } @@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base); } @@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice); } @@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base); } @@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice); } @@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base); } @@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice); } @@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base); } @@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice); } @@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base); } @@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice); } @@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base); } @@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice); } @@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base); } @@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice); } @@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base); } @@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice); } @@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base); } @@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice); } @@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base); } @@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice); } @@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base); } @@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base); } @@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base); } @@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base); } @@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base); } @@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base); } @@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base); } @@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base); } @@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base); } @@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base); } @@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base); } @@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base); } @@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base); } @@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base); } @@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base); } @@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base); } @@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base); } @@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base); } @@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base); } @@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base); } @@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base); } @@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base); } @@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base); } @@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { +svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c index 9ed158aedf7e3..98ebbefc2e74c 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c @@ -22,7 +22,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_hor_za8(0, slice_base, pg, ptr); svst1_hor_za8(0, slice_base + 15, pg, ptr); } @@ -45,7 +45,7 @@ void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_hor_za16(0, slice_base, pg, ptr); svst1_hor_za16(1, slice_base + 7, pg, ptr); } @@ -68,7 +68,7 @@ void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_hor_za32(0, slice_base, pg, ptr); svst1_hor_za32(3, slice_base + 3, pg, ptr); } @@ -91,7 +91,7 @@ void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_hor_za64(0, slice_base, pg, ptr); svst1_hor_za64(7, slice_base + 1, pg, ptr); } @@ -112,7 +112,7 @@ void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_hor_za128(0, slice_base, pg, ptr); svst1_hor_za128(15, slice_base, pg, ptr); } @@ -133,7 +133,7 @@ void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_str // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za8(0, slice_base, pg, ptr); svst1_ver_za8(0, slice_base + 15, pg, ptr); } @@ -156,7 +156,7 @@ void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za16(0, slice_base, pg, ptr); svst1_ver_za16(1, slice_base + 7, pg, ptr); } @@ -179,7 +179,7 @@ void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za32(0, slice_base, pg, ptr); svst1_ver_za32(3, slice_base + 3, pg, ptr); } @@ -202,7 +202,7 @@ void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za64(0, slice_base, pg, ptr); svst1_ver_za64(7, slice_base + 1, pg, ptr); } @@ -223,7 +223,7 @@ void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming { +void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za128(0, slice_base, pg, ptr); svst1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c index 627098d9365bd..938e62a15c771 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c @@ -28,7 +28,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -57,7 +57,7 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -86,7 +86,7 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -115,7 +115,7 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_hor_vnum_za64(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -142,7 +142,7 @@ void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_hor_vnum_za128(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za128(15, slice_base, pg, ptr, vnum); } @@ -169,7 +169,7 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_ver_vnum_za8(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -198,7 +198,7 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_ver_vnum_za16(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -227,7 +227,7 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_ver_vnum_za32(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -256,7 +256,7 @@ void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_ver_vnum_za64(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -283,7 +283,7 @@ void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { +void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svst1_ver_vnum_za128(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za128(15, slice_base, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c index baadfc18563a0..bcf368bc8dce4 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -12,7 +12,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { +void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_shared_za { svstr_vnum_za(slice_base, ptr, 0); } @@ -22,7 +22,7 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { +void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_shared_za { svstr_vnum_za(slice_base, ptr, 15); } @@ -32,7 +32,7 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svstr_za(uint32_t slice_base, void *ptr) { +void test_svstr_za(uint32_t slice_base, void *ptr) __arm_shared_za { svstr_za(slice_base, ptr); } @@ -43,7 +43,7 @@ void test_svstr_za(uint32_t slice_base, void *ptr) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]]) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) { +void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_shared_za { svstr_vnum_za(slice_base, ptr, vnum); } @@ -53,6 +53,6 @@ void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) { +void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_shared_za { svstr_vnum_za(slice_base, ptr, 16); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c index 2cc338add314b..38c8402c3d0fa 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn); } @@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn); } @@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn); } @@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn); } @@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn); } @@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn); } @@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn); } @@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn); } @@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn); } @@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn); } @@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn); } @@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn); } @@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn); } @@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn); } @@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn); } @@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn); } @@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn); } @@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn); } @@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn); } @@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn); } @@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn); } @@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn); } @@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn); } @@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn); } @@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn); } @@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn); } @@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn); } @@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn); } @@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn); } @@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn); } @@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn); } @@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn); } @@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn); } @@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn); } @@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn); } @@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn); } @@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn); } @@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn); } @@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn); } @@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn); } @@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn); } @@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn); } @@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn); } @@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn); } @@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn); } @@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn); } @@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn); } @@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn); } @@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn); } @@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn); } @@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn); } @@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn); } @@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn); } @@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn); } @@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn); } @@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn); } @@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn); } @@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn); } @@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn); } @@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn); } @@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn); } @@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn); } @@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn); } @@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn); } @@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn); } @@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn); } @@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn); } @@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn); } @@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn); } @@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn); } @@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn); } @@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn); } @@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn); } @@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming { +void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn); } @@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn); } @@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming { +void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn); } @@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn); } @@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming { +void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn); } @@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn); } @@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming { +void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn); } @@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn); } @@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming { +void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn); } @@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn); } @@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming { +void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn); } @@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn); } @@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming { +void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn); } @@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn); } @@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming { +void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn); } @@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn); } @@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn); } @@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn); } @@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming { +void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn); } @@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn); } @@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming { +void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn); } @@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn); } @@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming { +void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c index 750eead7c705e..ddd9602369538 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c @@ -18,7 +18,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 0) // CHECK-CXX-NEXT: ret void // -void test_svzero_mask_za() { +void test_svzero_mask_za(void) __arm_shared_za { svzero_mask_za(0); } @@ -34,7 +34,7 @@ void test_svzero_mask_za() { // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 176) // CHECK-CXX-NEXT: ret void // -void test_svzero_mask_za_1() { +void test_svzero_mask_za_1(void) __arm_shared_za { svzero_mask_za(176); } @@ -50,7 +50,7 @@ void test_svzero_mask_za_1() { // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) // CHECK-CXX-NEXT: ret void // -void test_svzero_mask_za_2() { +void test_svzero_mask_za_2(void) __arm_shared_za { svzero_mask_za(255); } @@ -66,7 +66,7 @@ void test_svzero_mask_za_2() { // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) // CHECK-CXX-NEXT: ret void // -void test_svzero_za() { +void test_svzero_za(void) __arm_shared_za { svzero_za(); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c index 361a9e82a3adb..e63d9f0a84757 100644 --- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c @@ -97,3 +97,8 @@ svbool_t streaming_caller_ptrue(void) __arm_streaming { // expected-no-warning return svand_z(svptrue_b16(), svptrue_pat_b16(SV_ALL), svptrue_pat_b16(SV_VL4)); } + +svint8_t missing_za(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { + // expected-warning@+1 {{builtin call is not valid when calling from a function without active ZA state}} + return svread_hor_za8_s8_m(zd, pg, 0, slice_base); +} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp index 47c7210206b05..529d0d2d1e625 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -12,7 +12,7 @@ #include -void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { +void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} @@ -32,7 +32,7 @@ void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(1, slice, pg, svundef_s8()); } -void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { +void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} @@ -52,7 +52,7 @@ void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(2, slice, pg, svundef_s16()); } -void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { +void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} @@ -90,7 +90,7 @@ void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { SVE_ACLE_FUNC(svusmops_za32, _u8, _m,)(-1, pg, pg, svundef_u8(), svundef_s8()); } -void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { +void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} @@ -133,7 +133,7 @@ void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { SVE_ACLE_FUNC(svmops_za64, _f64, _m,)(-1, pg, pg, svundef_f64(), svundef_f64()); } -void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { +void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} @@ -153,14 +153,14 @@ void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming { SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(16, slice, pg, svundef_s8()); } -void test_range_0_255(svbool_t pg, void *ptr) __arm_streaming { +void test_range_0_255(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { // expected-error@+1 {{argument value 256 is outside the valid range [0, 255]}} SVE_ACLE_FUNC(svzero_mask_za,,,)(256); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 255]}} SVE_ACLE_FUNC(svzero_mask_za,,,)(-1); } -void test_constant(uint64_t u64, svbool_t pg, void *ptr) __arm_streaming { +void test_constant(uint64_t u64, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}} SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}} SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c index 7cfe9fdfbd24f..95bb6be2d2d34 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c @@ -6,21 +6,21 @@ #include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) __arm_streaming { +void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming { +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming { +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za16(0, 0, pg, ptr); } __attribute__((target("+sme"))) -void undefined(svbool_t pg, void *ptr) { +void undefined(svbool_t pg, void *ptr) __arm_shared_za { svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}} } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index a59b7099d5adf..311c6b09dc790 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -385,6 +385,9 @@ class SVEEmitter { /// Emit all the range checks for the immediates. void createSMERangeChecks(raw_ostream &o); + /// Create a table for a builtin's requirement for PSTATE.ZA. + void createBuiltinZAState(raw_ostream &OS); + /// Create intrinsic and add it to \p Out void createIntrinsic(Record *R, SmallVectorImpl> &Out); @@ -1705,6 +1708,31 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { OS << "#endif\n\n"; } +void SVEEmitter::createBuiltinZAState(raw_ostream &OS) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) + createIntrinsic(R, Defs); + + std::map> DefsZAState; + + uint64_t IsSharedZAFlag = getEnumValueForFlag("IsSharedZA"); + for (auto &Def : Defs) { + bool HasZAState = Def->isFlagSet(IsSharedZAFlag); + DefsZAState[HasZAState].insert(Def->getMangledName()); + } + + OS << "#ifdef GET_SME_BUILTIN_HAS_ZA_STATE\n"; + + for (auto HasZA : {true, false}) { + auto Names = DefsZAState[HasZA]; + for (auto Name : Names) + OS << "case SME::BI__builtin_sme_" << Name << ":\n"; + OS << " return " << (HasZA ? "true" : "false") << ";\n"; + } + OS << "#endif\n\n"; +} + void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; @@ -1794,4 +1822,8 @@ void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) { void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME); } + +void EmitSmeBuiltinZAState(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createBuiltinZAState(OS); +} } // End namespace clang diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index 9043d90d7cb42..c1f2ca15b595c 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -92,6 +92,7 @@ enum ActionType { GenArmSmeBuiltinCG, GenArmSmeRangeChecks, GenArmSmeStreamingAttrs, + GenArmSmeBuiltinZAState, GenArmCdeHeader, GenArmCdeBuiltinDef, GenArmCdeBuiltinSema, @@ -260,6 +261,8 @@ cl::opt Action( "Generate arm_sme_sema_rangechecks.inc for clang"), clEnumValN(GenArmSmeStreamingAttrs, "gen-arm-sme-streaming-attrs", "Generate arm_sme_streaming_attrs.inc for clang"), + clEnumValN(GenArmSmeBuiltinZAState, "gen-arm-sme-builtin-za-state", + "Generate arm_sme_builtins_za_state.inc for clang"), clEnumValN(GenArmMveHeader, "gen-arm-mve-header", "Generate arm_mve.h for clang"), clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def", @@ -518,6 +521,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmSmeStreamingAttrs: EmitSmeStreamingAttrs(Records, OS); break; + case GenArmSmeBuiltinZAState: + EmitSmeBuiltinZAState(Records, OS); + break; case GenArmCdeHeader: EmitCdeHeader(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index 6ec51776d637c..35f2f04c1e818 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -112,6 +112,7 @@ void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltinZAState(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);