diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 5012874a08790..6312223f5d112 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -294,6 +294,7 @@ defm SVMOPS : ZAFPOuterProd<"mops">; multiclass MOP4 checks> { def _1x1 : Inst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>; + def _1x2 : Inst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{d}]", "vid2", t, MergeNone, i # "_1x2", [IsInOutZA, IsStreaming], checks>; } let SMETargetGuard = "sme2,sme-mop4" in { @@ -345,6 +346,10 @@ multiclass SUMOP4 che "vidu", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x1", [IsStreaming, IsInOutZA], checks>; + def _1x2 : SInst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{3}]", + "vid2.u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x2", + [IsStreaming, IsInOutZA], + checks>; } multiclass USMOP4 checks> { @@ -352,6 +357,10 @@ multiclass USMOP4 che "vidx", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x1", [IsStreaming, IsInOutZA], checks>; + def _1x2 : SInst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{3}]", + "vid2.x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x2", + [IsStreaming, IsInOutZA], + checks>; } let SMETargetGuard = "sme2,sme-mop4" in { diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b51106fa56759..35263541b67ae 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2420,8 +2420,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVSUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "sil", MergeNone, "aarch64_sve_sunpk_x2", [IsStreaming], []>; def SVUUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x2", [IsStreaming], []>; - def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; - def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; + def SVSUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; + def SVUUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c new file mode 100644 index 0000000000000..3c8bd372aa547 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c @@ -0,0 +1,466 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svmop4a_1x2_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x2_za32_s8_s8u10__SVInt8_t10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_s8_s8(svint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_s8_s8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x2_za32_s8_s8u10__SVInt8_t10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_s8_s8(svint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_s8_s8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x2_za32_u8_u8u11__SVUint8_t11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_u8_u8(svuint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_u8_u8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x2_za32_u8_u8u11__SVUint8_t11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_u8_u8(svuint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_u8_u8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x2_za32_s8_u8u10__SVInt8_t11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_s8_u8(svint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_s8_u8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x2_za32_s8_u8u10__SVInt8_t11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_s8_u8(svint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_s8_u8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x2_za32_u8_s8u11__SVUint8_t10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_u8_s8(svuint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_u8_s8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x2_za32_u8_s8u11__SVUint8_t10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x2.nxv16i8(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_u8_s8(svuint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_u8_s8)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za32_s16_s16u11__SVInt16_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_s16_s16(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_s16_s16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za32_s16_s16u11__SVInt16_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_s16_s16(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_s16_s16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za32_u16_u16u12__SVUint16_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_u16_u16(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_u16_u16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za32_u16_u16u12__SVUint16_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_u16_u16(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_u16_u16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za32_f16_f16u13__SVFloat16_t13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_f16_f16(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_f16_f16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za32_f16_f16u13__SVFloat16_t13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_f16_f16(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_f16_f16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4a_1x2_za32_bf16_bf16u14__SVBfloat16_t14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_bf16_bf16(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_bf16_bf16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4s_1x2_za32_bf16_bf16u14__SVBfloat16_t14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_bf16_bf16(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_bf16_bf16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za64_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za64_s16_s16u11__SVInt16_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za64_s16_s16(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za64,_s16_s16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za64_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za64_s16_s16u11__SVInt16_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za64_s16_s16(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za64,_s16_s16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za64_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za64_u16_u16u12__SVUint16_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za64_u16_u16(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za64,_u16_u16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za64_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za64_u16_u16u12__SVUint16_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za64_u16_u16(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za64,_u16_u16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za64_s16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za64_s16_u16u11__SVInt16_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za64_s16_u16(svint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za64,_s16_u16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za64_s16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za64_s16_u16u11__SVInt16_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za64_s16_u16(svint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za64,_s16_u16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za64_u16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za64_u16_s16u12__SVUint16_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za64_u16_s16(svuint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za64,_u16_s16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za64_u16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za64_u16_s16u12__SVUint16_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x2.nxv8i16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za64_u16_s16(svuint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za64,_u16_s16)(1, zn, zm); +} + + +// CHECK-LABEL: @test_svmop4a_1x2_za16_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za16_f16_f16u13__SVFloat16_t13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za16_f16_f16(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za16,_f16_f16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za16_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za16_f16_f16u13__SVFloat16_t13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv8f16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za16_f16_f16(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za16,_f16_f16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za32_f32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv4f32(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za32_f32_f32u13__SVFloat32_t13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv4f32(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_f32_f32(svfloat32_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za32,_f32_f32)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za32_f32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv4f32(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za32_f32_f32u13__SVFloat32_t13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv4f32(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za32_f32_f32(svfloat32_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za32,_f32_f32)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za64_f64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv2f64(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x2_za64_f64_f64u13__SVFloat64_t13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv2f64(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za64_f64_f64(svfloat64_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za64,_f64_f64)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za64_f64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv2f64(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x2_za64_f64_f64u13__SVFloat64_t13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv2f64(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za64_f64_f64(svfloat64_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za64,_f64_f64)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x2_za16_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4a_1x2_za16_bf16_bf16u14__SVBfloat16_t14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za16_bf16_bf16(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a,_1x2_,za16,_bf16_bf16)(1, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x2_za16_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4s_1x2_za16_bf16_bf16u14__SVBfloat16_t14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x2.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x2_za16_bf16_bf16(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s,_1x2_,za16,_bf16_bf16)(1, zn, zm); +} diff --git a/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp index 556cb1742dbbd..47ce2a0f5f80f 100644 --- a/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp @@ -6,19 +6,19 @@ #include -void tests_mop4_imm_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_s8_s8_1x1(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} return; } -void tests_mop4_imm_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_u8_u8_1x1(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} return; } -void tests_mop4_imm_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_s8_u8_1x1(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4a_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} @@ -26,7 +26,7 @@ void tests_mop4_imm_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout return; } -void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_s16_s16_1x1(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} @@ -35,7 +35,7 @@ void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_in return; } -void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_u16_u16_1x1(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} @@ -44,7 +44,7 @@ void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_ return; } -void tests_mop4_imm_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_s16_u16_1x1(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} svmop4s_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} svmop4a_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} @@ -52,7 +52,7 @@ void tests_mop4_imm_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_i return; } -void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_f16_f16_1x1(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} @@ -61,7 +61,7 @@ void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __ar return; } -void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_bf16_bf16_1x1(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} @@ -71,14 +71,98 @@ void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming } -void tests_mop4_imm_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_f32_f32_1x1(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svmop4s_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} return; } -void tests_mop4_imm_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { +void tests_mop4_imm_f64_f64_1x1(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { svmop4a_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} svmop4s_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} return; } + +void tests_mop4_imm_s8_s8_1x2(svint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_u8_u8_1x2(svuint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_s8_u8_1x2(svint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_u8_s8_1x2(svuint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_u8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_u8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_s16_s16_1x2(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x2_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x2_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_u16_u16_1x2(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x2_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x2_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_s16_u16_1x2(svint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x2_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_u16_s16_1x2(svuint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za64_u16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x2_za64_u16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_f16_f16_1x2(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x2_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4s_1x2_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + return; +} + +void tests_mop4_imm_bf16_bf16_1x2(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x2_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4s_1x2_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + return; +} + +void tests_mop4_imm_f32_f32_1x2(svfloat32_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x2_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_f64_f64_1x2(svfloat64_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x2_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index e226987b4844b..200f57960fff8 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1043,7 +1043,10 @@ std::string Intrinsic::replaceTemplatedArgs(std::string Name, TypeSpec TS, case '1': case '2': case '3': - T = SVEType(TS, Proto[C - '0']); + // Extract the modifier before passing to SVEType to handle numeric + // modifiers + auto [Mod, NumVectors] = getProtoModifier(Proto, (C - '0')); + T = SVEType(TS, Mod); break; } diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index fe8769154b1da..f08bdf78b5f96 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3070,11 +3070,19 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; + class SME_OuterProduct_QuarterTile_Single_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; + // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S foreach mode = ["s", "a"] in { foreach za = ["", "_za64"] in { foreach ty = ["s", "u", "su", "us"] in { def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x1" : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x2" : SME_OuterProduct_QuarterTile_Single_Multi; } } } @@ -3083,9 +3091,10 @@ let TargetPrefix = "aarch64" in { foreach mode = ["s", "a"] in { foreach wide = ["", "_wide"] in { def int_aarch64_sme_mop4 # mode # wide # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_mop4 # mode # wide # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi; } } - + class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 54c63ead059ae..87a8f068083d5 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -270,6 +270,9 @@ class SME2_ZA_Tile_Vec_Single_Single_Pat(name # _PSEUDO) $tile, $Zn, $Zm)>; +class SME2_ZA_Tile_Vec_Multi_Pat + : Pat<(intrinsic imm_ty:$tile, vt:$Zn, vt:$Zm1, vt:$Zm2), + (!cast(name # _PSEUDO) $tile, $Zn, (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -623,7 +626,12 @@ multiclass sme_quarter_outer_product_i8_i32; def _MZ2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 1}, subtr, - ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, mnemonic>; + ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, mnemonic>, SMEPseudo2Instr; + + def NAME # _MZ2Z_BToS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_3, nxv16i8>; + def _M2Z2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 1}, subtr, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, mnemonic>; } @@ -639,7 +647,12 @@ multiclass sme_quarter_outer_product_i16_i32; def _MZ2Z_HToS : sme_quarter_outer_product_i16_i32; + ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>, SMEPseudo2Instr; + + def NAME # _MZ2Z_HToS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_3, nxv8i16>; + def _M2Z2Z_HToS : sme_quarter_outer_product_i16_i32; } @@ -655,7 +668,12 @@ multiclass sme_quarter_outer_product_i64; def _MZ2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 1}, subtr, - ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>; + ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>, SMEPseudo2Instr; + + def NAME # _MZ2Z_HtoD_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_7, nxv8i16>; + def _M2Z2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 1}, subtr, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>; } @@ -5509,7 +5527,12 @@ multiclass sme2_bfmop4as_widening { def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; // Single and multiple vectors - def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>, SMEPseudo2Instr; + + def NAME # _MZ2Z_S_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_3, nxv8bf16>; + // Multiple vectors def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; @@ -5660,7 +5683,11 @@ multiclass sme2_fmop4as_fp16_non_widening { def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; // Single and multiple vectors - def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>, SMEPseudo2Instr; + + def NAME # _MZ2Z_H_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_1, nxv8f16>; // Multiple vectors def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; @@ -5736,7 +5763,11 @@ multiclass sme2_bfmop4as_non_widening { def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; // Single and multiple vectors - def _MZ2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + def _MZ2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>, SMEPseudo2Instr; + + def NAME # _MZ2Z_H_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_1, nxv8bf16>; // Multiple vectors def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; @@ -5777,7 +5808,11 @@ multiclass sme2_fmop4as_fp32_non_widening { def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; // Single and multiple vectors - def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>; + def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>, SMEPseudo2Instr; + + def NAME # _MZ2Z_S_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_3, nxv4f32>; // Multiple vectors def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>; @@ -5818,7 +5853,11 @@ multiclass sme2_fmop4as_fp64_non_widening { def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; // Single and multiple vectors - def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>; + def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>, SMEPseudo2Instr; + + def NAME # _MZ2Z_D_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_7, nxv2f64>; // Multiple vectors def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>; @@ -5859,7 +5898,11 @@ multiclass sme2_fmop4as_fp16_fp32_widening { def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; // Single and multiple vectors - def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>, SMEPseudo2Instr; + + def NAME # _MZ2Z_HtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_Vec_Multi_Pat(op # "_1x2"), timm32_0_3, nxv8f16>; // Multiple vectors def _M2Z2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x2.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x2.ll new file mode 100644 index 0000000000000..f3540458dcaa6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x2.ll @@ -0,0 +1,462 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +; Widening +define void @mop4a_za32_s8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4a.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_s8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_u8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umop4a za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umop4a.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_u8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umop4s za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umop4s.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_s8_u8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_s8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4a za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4a.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_s8_u8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_s8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4s za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4s.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_u8_s8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_u8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4a za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4a.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_u8_s8( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_u8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4s za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4s.wide.1x2.nxv16i8(i32 0, %zn, %zm1, %zm2) + ret void +} + + +define void @mop4a_za32_s16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4a.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_s16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_u16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umop4a za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umop4a.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_u16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umop4s za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umop4s.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_f16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x2.nxv8f16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_f16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8f16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_bf16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4a za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x2.nxv8bf16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_bf16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za0.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8bf16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za64_s16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za64_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4a.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_s16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za64_u16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za64_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umop4a za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umop4a.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_u16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umop4s za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umop4s.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za64_s16_u16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za64_s16_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4a za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4a.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_s16_u16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_s16_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4s za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4s.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za64_u16_s16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za64_u16_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4a za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4a.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_u16_s16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_u16_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4s za0.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4s.za64.wide.1x2.nxv8i16(i32 0, %zn, %zm1, %zm2) + ret void +} + +; Non-widening +define void @mop4a_za16_f16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.h, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x2.nxv8f16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za16_f16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za0.h, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv8f16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za32_f32( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.s, z0.s, { z24.s, z25.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x2.nxv4f32(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_f32( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za0.s, z0.s, { z24.s, z25.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv4f32(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za64_f64( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.d, z0.d, { z24.d, z25.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x2.nxv2f64(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_f64( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za0.d, z0.d, { z24.d, z25.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv2f64(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4a_za16_bf16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4a_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4a za0.h, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x2.nxv8bf16(i32 0, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za16_bf16( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za0.h, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv8bf16(i32 0, %zn, %zm1, %zm2) + ret void +} + +; Tile limits + +define void @mop4s_za32_s8_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_s8_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za3.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv16i8(i32 3, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_s16_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_s16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za3.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x2.nxv8i16(i32 3, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_f16_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_f16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za3.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8f16(i32 3, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_bf16_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_bf16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za3.s, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x2.nxv8bf16(i32 3, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_s16_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_s16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za7.d, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.za64.wide.1x2.nxv8i16(i32 7, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za64_f64_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za64_f64_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za7.d, z0.d, { z24.d, z25.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv2f64(i32 7, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za32_f32_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za32_f32_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za3.s, z0.s, { z24.s, z25.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv4f32(i32 3, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za16_f16_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za16_f16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.h, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv8f16(i32 1, %zn, %zm1, %zm2) + ret void +} + +define void @mop4s_za16_bf16_limit( %zn, %zm1, %zm2) #0 { +; CHECK-LABEL: mop4s_za16_bf16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za1.h, z0.h, { z24.h, z25.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x2.nxv8bf16(i32 1, %zn, %zm1, %zm2) + ret void +} + +attributes #0 = {nounwind "target-features" = "+sme-i16i64,+sme-f64f64,+sme-b16b16,+sme2p1,+bf16,+sme-f16f16,+sme-mop4" }