diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 000bd97a4b25d..1ac6d5170ea28 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,16 +298,6 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } - - let TargetGuard = "sme-f16f16|sme-f8f16" in { - def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; - def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; - } - - let TargetGuard = "sme2,b16b16" in { - def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; - def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; - } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c deleted file mode 100644 index d98427fac610b..0000000000000 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c +++ /dev/null @@ -1,193 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 -#endif - -// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: ret void -// -void test_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svadd_za16,_f16,_vg1x2)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x4_f16j13svfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-CXX-NEXT: ret void -// -void test_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svadd_za16,_f16,_vg1x4)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x2_f16j13svfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: ret void -// -void test_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svsub_za16,_f16,_vg1x2)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x4_f16j13svfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-CXX-NEXT: ret void -// -void test_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svsub_za16,_f16,_vg1x4)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x2_bf16j14svbfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: ret void -// -void test_svadd_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svadd_za16,_bf16,_vg1x2)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x4_bf16j14svbfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-CXX-NEXT: ret void -// -void test_svadd_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svadd_za16,_bf16,_vg1x4)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x2_bf16j14svbfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: ret void -// -void test_svsub_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svsub_za16,_bf16,_vg1x2)(slice, zn); -} - -// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x4_bf16j14svbfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-CXX-NEXT: ret void -// -void test_svsub_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svsub_za16,_bf16,_vg1x4)(slice, zn); -} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c deleted file mode 100644 index 0eeeac5a50469..0000000000000 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c +++ /dev/null @@ -1,29 +0,0 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify -emit-llvm %s - -// REQUIRES: aarch64-registered-target - -#include - -void test_features(uint32_t slice, svfloat16x2_t zn2, svfloat16x4_t zn4, - svbfloat16x2_t bzn2, svbfloat16x4_t bzn4) __arm_streaming __arm_inout("za") { - // expected-error@+1 {{'svadd_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}} - svadd_za16_f16_vg1x2(slice, zn2); - // expected-error@+1 {{'svadd_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}} - svadd_za16_f16_vg1x4(slice, zn4); - // expected-error@+1 {{'svsub_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}} - svsub_za16_f16_vg1x2(slice, zn2); - // expected-error@+1 {{'svsub_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}} - svsub_za16_f16_vg1x4(slice, zn4); - - // expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme2,b16b16}} - svadd_za16_bf16_vg1x2(slice, bzn2); - // expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme2,b16b16}} - svadd_za16_bf16_vg1x4(slice, bzn4); - // expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme2,b16b16}} - svsub_za16_bf16_vg1x2(slice, bzn2); - // expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme2,b16b16}} - svsub_za16_bf16_vg1x4(slice, bzn4); -} - - - diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 04571faf13067..e31e00a9c76f3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3481,7 +3481,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector add/sub and accumulate into ZA // foreach intr = ["add", "sub"] in { - foreach za = ["za16","za32", "za64"] in { + foreach za = ["za32", "za64"] in { def int_aarch64_sme_ # intr # _ # za # _vg1x2 : SME2_ZA_Write_VG2_Intrinsic; def int_aarch64_sme_ # intr # _ # za # _vg1x4 : SME2_ZA_Write_VG4_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 5102c602d54e1..574178c8d5244 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -793,10 +793,10 @@ defm LUTI4_S_4ZTZI : sme2p1_luti4_vector_vg4_index<"luti4">; } let Predicates = [HasSMEF16F16orSMEF8F16] in { -defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x2>; -defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x4>; -defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_sub_za16_vg1x2>; -defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_sub_za16_vg1x4>; +defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; +defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; +defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; +defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; } let Predicates = [HasSMEF16F16] in { @@ -822,10 +822,10 @@ defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; } let Predicates = [HasSME2, HasB16B16] in { -defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_add_za16_vg1x2>; -defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_add_za16_vg1x4>; -defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_sub_za16_vg1x2>; -defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_sub_za16_vg1x4>; +defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16>; defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16>; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll deleted file mode 100644 index e7a6c0d6c549b..0000000000000 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll +++ /dev/null @@ -1,148 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -verify-machineinstrs < %s | FileCheck %s - -target triple = "aarch64-linux" - -define void @add_f16_vg1x2(i32 %slice, %zn0, %zn1) #0 { -; CHECK-LABEL: add_f16_vg1x2: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: fadd za.h[w8, 0, vgx2], { z0.h, z1.h } -; CHECK-NEXT: fadd za.h[w8, 7, vgx2], { z0.h, z1.h } -; CHECK-NEXT: ret - call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 %slice, %zn0, %zn1) - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 %slice.7, %zn0, %zn1) - ret void -} - -define void @add_f16_vg1x4(i32 %slice, %zn0, %zn1, -; CHECK-LABEL: add_f16_vg1x4: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: fadd za.h[w8, 0, vgx4], { z0.h - z3.h } -; CHECK-NEXT: fadd za.h[w8, 7, vgx4], { z0.h - z3.h } -; CHECK-NEXT: ret - %zn2, %zn3) #1 { - call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 %slice, %zn0, %zn1, - %zn2, %zn3); - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 %slice.7, %zn0, %zn1, - %zn2, %zn3); - ret void -} - -define void @sub_f16_vg1x2(i32 %slice, %zn0, %zn1) #1 { -; CHECK-LABEL: sub_f16_vg1x2: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: fsub za.h[w8, 0, vgx2], { z0.h, z1.h } -; CHECK-NEXT: fsub za.h[w8, 7, vgx2], { z0.h, z1.h } -; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 %slice, %zn0, %zn1) - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 %slice.7, %zn0, %zn1) - ret void -} - -define void @sub_f16_vg1x4(i32 %slice, %zn0, %zn1, -; CHECK-LABEL: sub_f16_vg1x4: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: fsub za.h[w8, 0, vgx4], { z0.h - z3.h } -; CHECK-NEXT: fsub za.h[w8, 7, vgx4], { z0.h - z3.h } -; CHECK-NEXT: ret - %zn2, %zn3) #0 { - call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 %slice, %zn0, %zn1, - %zn2, %zn3); - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 %slice.7, %zn0, %zn1, - %zn2, %zn3); - ret void -} - -define void @add_bf16_vg1x2(i32 %slice, %zn0, %zn1) #2 { -; CHECK-LABEL: add_bf16_vg1x2: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h } -; CHECK-NEXT: bfadd za.h[w8, 7, vgx2], { z0.h, z1.h } -; CHECK-NEXT: ret - call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 %slice, %zn0, %zn1) - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 %slice.7, %zn0, %zn1) - ret void -} - -define void @add_bf16_vg1x4(i32 %slice, %zn0, %zn1, -; CHECK-LABEL: add_bf16_vg1x4: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h } -; CHECK-NEXT: bfadd za.h[w8, 7, vgx4], { z0.h - z3.h } -; CHECK-NEXT: ret - %zn2, %zn3) #2 { - call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 %slice, %zn0, %zn1, - %zn2, %zn3); - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 %slice.7, %zn0, %zn1, - %zn2, %zn3); - ret void -} - -define void @sub_bf16_vg1x2(i32 %slice, %zn0, %zn1) #2 { -; CHECK-LABEL: sub_bf16_vg1x2: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h } -; CHECK-NEXT: bfsub za.h[w8, 7, vgx2], { z0.h, z1.h } -; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 %slice, %zn0, %zn1) - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 %slice.7, %zn0, %zn1) - ret void -} - -define void @sub_bf16_vg1x4(i32 %slice, %zn0, %zn1, -; CHECK-LABEL: sub_bf16_vg1x4: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h } -; CHECK-NEXT: bfsub za.h[w8, 7, vgx4], { z0.h - z3.h } -; CHECK-NEXT: ret - %zn2, %zn3) #2 { - call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 %slice, %zn0, %zn1, - %zn2, %zn3); - %slice.7 = add i32 %slice, 7 - call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 %slice.7, %zn0, %zn1, - %zn2, %zn3); - ret void -} - -attributes #0 = { nounwind "target-features"="+sme-f16f16" } -attributes #1 = { nounwind "target-features"="+sme-f8f16" } -attributes #2 = { nounwind "target-features"="+sme2,+bf16,+b16b16" }