Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" #91597

momchil-velikov · 2024-05-09T14:03:23Z

Reverts #88266 due to test failures

error: 'expected-error' diagnostics seen but not expected:
(frontend): '-fsyntax-only' action ignored; '-emit-llvm' action specified previously

…accumula…" This reverts commit e88ba6d.

llvmbot · 2024-05-09T14:03:52Z

@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-ir

Author: Momchil Velikov (momchil-velikov)

Changes

Reverts llvm/llvm-project#88266 due to test failures

error: 'expected-error' diagnostics seen but not expected:
(frontend): '-fsyntax-only' action ignored; '-emit-llvm' action specified previously

Patch is 31.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91597.diff

6 Files Affected:

(modified) clang/include/clang/Basic/arm_sme.td (-10)
(removed) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c (-193)
(removed) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c (-29)
(modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+1-1)
(modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+8-8)
(removed) llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll (-148)

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 000bd97a4b25d..1ac6d5170ea28 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,16 +298,6 @@ multiclass ZAAddSub<string n_suffix> {
     def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>;
     def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>;
   }
-
-  let TargetGuard = "sme-f16f16|sme-f8f16" in {
-    def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>;
-    def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
-  }
-
-  let TargetGuard = "sme2,b16b16" in {
-    def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>;
-    def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
-  }
 }
 
 defm SVADD : ZAAddSub<"add">;
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
deleted file mode 100644
index d98427fac610b..0000000000000
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
+++ /dev/null
@@ -1,193 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
-// RUN: %clang_cc1                               -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1                        -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature  +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS        -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature  +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
-
-// RUN: %clang_cc1                               -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sme.h>
-
-#ifdef SVE_OVERLOADED_FORMS
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
-#else
-#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
-#endif
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svadd_za16,_f16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x4_f16j13svfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svadd_za16,_f16,_vg1x4)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x2_f16j13svfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svsub_za16,_f16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x4_f16j13svfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svsub_za16,_f16,_vg1x4)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x2_bf16j14svbfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svadd_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svadd_za16,_bf16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x4_bf16j14svbfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svadd_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svadd_za16,_bf16,_vg1x4)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x2_bf16j14svbfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svsub_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svsub_za16,_bf16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x4_bf16j14svbfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-CXX-NEXT:    ret void
-//
-void test_svsub_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") {
-  SVE_ACLE_FUNC(svsub_za16,_bf16,_vg1x4)(slice, zn);
-}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
deleted file mode 100644
index 0eeeac5a50469..0000000000000
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify -emit-llvm %s
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sme.h>
-
-void test_features(uint32_t slice, svfloat16x2_t zn2, svfloat16x4_t zn4,
-                   svbfloat16x2_t bzn2, svbfloat16x4_t bzn4) __arm_streaming __arm_inout("za") {
-  // expected-error@+1 {{'svadd_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}}
-  svadd_za16_f16_vg1x2(slice, zn2);
-  // expected-error@+1 {{'svadd_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}}
-  svadd_za16_f16_vg1x4(slice, zn4);
-  // expected-error@+1 {{'svsub_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}}
-  svsub_za16_f16_vg1x2(slice, zn2);
-  // expected-error@+1 {{'svsub_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}}
-  svsub_za16_f16_vg1x4(slice, zn4);
-
-  // expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
-  svadd_za16_bf16_vg1x2(slice, bzn2);
-  // expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
-  svadd_za16_bf16_vg1x4(slice, bzn4);
-  // expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
-  svsub_za16_bf16_vg1x2(slice, bzn2);
-  // expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
-  svsub_za16_bf16_vg1x4(slice, bzn4);
-}
-
-
-
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 04571faf13067..e31e00a9c76f3 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3481,7 +3481,7 @@ let TargetPrefix = "aarch64" in {
   // Multi-vector add/sub and accum...
[truncated]

…3cc99a1c9 Local branch amd-gfx e8d3cc9 Merged main:96568f3539d8a72432a03257a7a8ed2f36014b59 into amd-gfx:112cc7959e1b Remote branch main 139e0aa Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" (llvm#91597)

Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector …

0f71196

…accumula…" This reverts commit e88ba6d.

llvmbot added clang Clang issues not falling into any other category backend:AArch64 clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir labels May 9, 2024

momchil-velikov merged commit 139e0aa into main May 9, 2024

momchil-velikov deleted the revert-88266-sme-bfadd branch May 9, 2024 14:03

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" #91597

Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" #91597

Uh oh!

momchil-velikov commented May 9, 2024

Uh oh!

llvmbot commented May 9, 2024 •

edited

Loading

Uh oh!

Uh oh!

Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" #91597

Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" #91597

Uh oh!

Conversation

momchil-velikov commented May 9, 2024

Uh oh!

llvmbot commented May 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

llvmbot commented May 9, 2024 •

edited

Loading