-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Clang][LLVM] Implement single-multi vectors MOP4{A/S} #128854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-ir Author: Virginia Cangelosi (virginia-cangelosi) ChangesImplement all single-multi {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on #127797 Patch is 117.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128854.diff 10 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 288a8c04c217f..8bffe6eb1183b 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -376,6 +376,33 @@ let SMETargetGuard = "sme2" in {
// Outer product and accumulate/subtract
//
+multiclass MOP4<string name, string n, string t, string i, string wide> {
+ def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+ def NAME # "_1x2" : Inst<"svmop4" # name # "_1x2_" # n # "[_{d}_{d}]", "vid2", t, MergeNone, i # wide # "_1x2", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+}
+
+multiclass SUMOP4<string s, string za, string t, string i> {
+ def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]",
+ "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x1",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_3>]>;
+ def _1x2 : SInst<"svmop4" # s # "[_1x2_]" # za # "[_{2}_{3}]",
+ "vid2.u", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x2",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_3>]>;
+}
+
+multiclass USMOP4<string s, string za, string t, string i> {
+ def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]",
+ "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x1",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_3>]>;
+ def _1x2 : SInst<"svmop4" # s # "[_1x2_]" # za # "[_{2}_{3}]",
+ "vid2.x", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x2",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_3>]>;
+}
+
let SMETargetGuard = "sme2" in {
def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
@@ -387,6 +414,25 @@ let SMETargetGuard = "sme2" in {
def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+ defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide">;
+ defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide">;
+
+ defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide">;
+ defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide">;
+
+ defm SVFMOP4A_HtoS : MOP4<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">;
+ defm SVFMOP4S_HtoS : MOP4<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">;
+ defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", "">;
+ defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", "">;
+
+ defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">;
+ defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">;
+
+ defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "">;
+ defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "">;
+ defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "">;
+ defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "">;
+
// VERTICAL DOT-PRODUCT
def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>;
def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>;
@@ -437,6 +483,15 @@ let SMETargetGuard = "sme2" in {
}
let SMETargetGuard = "sme2,sme-i16i64" in {
+ defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide">;
+ defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide">;
+ defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide">;
+ defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide">;
+ defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64">;
+ defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64">;
+ defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64">;
+ defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64">;
+
def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>;
def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>;
@@ -473,6 +528,9 @@ let SMETargetGuard = "sme2" in {
}
let SMETargetGuard = "sme2,sme-f64f64" in {
+ defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", "">;
+ defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", "">;
+
def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>;
@@ -490,6 +548,9 @@ let SMETargetGuard = "sme2,sme-f64f64" in {
}
let SMETargetGuard = "sme-f16f16" in {
+ defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", "">;
+ defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", "">;
+
def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>;
@@ -507,6 +568,9 @@ let SMETargetGuard = "sme-f16f16" in {
}
let SMETargetGuard = "sme-b16b16" in {
+ defm SVBMOP4A_H : MOP4<"a", "za16", "bf", "aarch64_sme_mop4a", "">;
+ defm SVBMOP4S_H : MOP4<"s", "za16", "bf", "aarch64_sme_mop4s", "">;
+
def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>;
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b20383e72e66a..c5cf478ea6e9b 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2420,8 +2420,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
def SVSUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "sil", MergeNone, "aarch64_sve_sunpk_x2", [IsStreaming], []>;
def SVUUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x2", [IsStreaming], []>;
- def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>;
- def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
+ def SVSUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>;
+ def SVUUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
}
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c
new file mode 100644
index 0000000000000..34a9633374d3f
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c
@@ -0,0 +1,465 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sme.h>
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_s8u10__SVInt8_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_s8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_s8u10__SVInt8_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_s8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_u8u11__SVUint8_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_u8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_u8u11__SVUint8_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_s16_s16u11__SVInt16_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_s16_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_s16_s16u11__SVInt16_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_u16_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_u16_u16u12__SVUint16_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_u16_u16,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_u16_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_u16_u16u12__SVUint16_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_u16_u16,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_f16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_f16_f16u13__SVFloat16_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4a_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4a_1x1_za32,_f16_f16,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4s_1x1_za32_f16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_f16_f16u13__SVFloat16_tS_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svmop4s_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svmop4s_1x1_za32,_f16_f16,)(3, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmop4a_1x1_za32_bf16_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEX...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
4c0e200
to
d3367db
Compare
d3367db
to
bd15cb4
Compare
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
bd15cb4
to
8fb286f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good. I would definitely highlight in the PR description the change to SVEEmitter you made though and the reasoning behind it.
@@ -3835,7 +3866,7 @@ let TargetPrefix = "aarch64" in { | |||
def int_aarch64_sme_luti4_lane_zt_x2 | |||
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], | |||
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>; | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
trailing whitespace added I think ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think removed by my editor deleting white space
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
82d0f96
to
8fb286f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
8fb286f
to
73b118d
Compare
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Virginia, do you mind also having a look on the C tests?
// CPP-CHECK-NEXT: ret void | ||
// | ||
void test_svmop4a_1x2_za32_s8_s8(svint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { | ||
SME_ACLE_FUNC(svmop4a_1x2_za32,_s8_s8,)(1, zn, zm); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be like:
(svmop4a,_1x2,_za32,_s8_s8)
For that we need to change the definition in :
#ifdef SME_OVERLOADED_FORMS
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3, A4_UNUSED) A1##A3
#else
#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3##A4
#endif
73b118d
to
1e2e616
Compare
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
1e2e616
to
801f1d5
Compare
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Virginia!
801f1d5
to
01febce
Compare
Implement all single-multi {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#127797 This patch updates the semantics of template arguments in intrinsic names for clarity and ease of use. Previously, template argument numbers indicated which character in the prototype string determined the final type suffix, which was confusing—especially for intrinsics using multiple prototype modifiers per operand (e.g., intrinsics operating on arrays of vectors). The number had to reference the correct character in the prototype (e.g., the ‘u’ in “2.u”), making the system cumbersome and error-prone. With this patch, template argument numbers now refer to the operand number that determines the final type suffix, providing a more intuitive and consistent approach.
Fix some of the recently-added tests (PRs llvm#127797, llvm#128854, llvm#129226 and llvm#129230) which were incorrectly defined.
Implement all single-multi {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files.
This PR depends on #127797
This patch updates the semantics of template arguments in intrinsic names for clarity and ease of use. Previously, template argument numbers indicated which character in the prototype string determined the final type suffix, which was confusing—especially for intrinsics using multiple prototype modifiers per operand (e.g., intrinsics operating on arrays of vectors). The number had to reference the correct character in the prototype (e.g., the ‘u’ in “2.u”), making the system cumbersome and error-prone.
With this patch, template argument numbers now refer to the operand number that determines the final type suffix, providing a more intuitive and consistent approach.