-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics #73304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Matthew Devereau (MDevereau) ChangesPatch by: Hassnaa Hamdi <[email protected]> Patch is 24.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73304.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index b5655afdf419ecf..1174e30cb0885e1 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,3 +298,11 @@ multiclass ZAAddSub<string n_suffix> {
defm SVADD : ZAAddSub<"add">;
defm SVSUB : ZAAddSub<"sub">;
+
+//
+// lookup table expand one register
+//
+let TargetGuard = "sme2" in {
+ def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
new file mode 100644
index 000000000000000..ebabbfc815c1dfe
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
@@ -0,0 +1,96 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+
+// CHECK-LABEL: @test_svluti2_lane_zt_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_u8(0, zn, 2);
+}
+
+
+// CHECK-LABEL: @test_svluti2_lane_zt_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti2_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_s8(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti2_lane_zt_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti2_lane_zt_u16(svuint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_u16(0, zn, 2);
+}
+
+
+// CHECK-LABEL: @test_svluti2_lane_zt_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti2_lane_zt_s16(svint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_s16(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti2_lane_zt_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svluti2_lane_zt_u32(svuint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_u32(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti2_lane_zt_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svluti2_lane_zt_s32(svint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_s32(0, zn, 2);
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
new file mode 100644
index 000000000000000..c6b9b7c82757562
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
@@ -0,0 +1,95 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+
+// CHECK-LABEL: @test_svluti4_lane_zt_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_u8(0, zn, 2);
+}
+
+
+// CHECK-LABEL: @test_svluti4_lane_zt_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti4_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_s8(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti4_lane_zt_u16(svuint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_u16(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti4_lane_zt_s16(svint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_s16(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svluti4_lane_zt_u32(svuint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_u32(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svluti4_lane_zt_s32(svint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_s32(0, zn, 2);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
new file mode 100644
index 000000000000000..eb3862df246318b
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+void test_svluti2_lane_zt(svuint8_t zn_u8, svuint16_t zn_u16, svuint32_t zn_u32) __arm_streaming __arm_shared_za __arm_preserves_za {
+ // Test Reg Offset
+ svluti2_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti2_lane_zt_u8(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+ // Test Reg Offset
+ svluti2_lane_zt_u16(1, zn_u16, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti2_lane_zt_u16(0, zn_u16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+ // Test Reg Offset
+ svluti2_lane_zt_u32(1, zn_u32, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti2_lane_zt_u32(0, zn_u32, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+}
+void test_svluti4_lane_zt(svuint8_t zn_u8, svuint16_t zn_u16, svuint32_t zn_u32) __arm_streaming __arm_shared_za __arm_preserves_za {
+ // Test Reg Offset
+ svluti4_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti4_lane_zt_u8(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ // Test Reg Offset
+ svluti4_lane_zt_u16(1, zn_u16, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti4_lane_zt_u16(0, zn_u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ // Test Reg Offset
+ svluti4_lane_zt_u32(1, zn_u32, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti4_lane_zt_u32(0, zn_u32, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1b701a91455c946..096b5764a25cac8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3459,4 +3459,13 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic;
def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic;
+ //
+ // Lookup table expand one register
+ //
+ def int_aarch64_sme_luti2_lane_zt
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, LLVMMatchType<0>, llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
+ def int_aarch64_sme_luti4_lane_zt
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, LLVMMatchType<0>, llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7617dccdeee397f..7d657ea1d0047b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -326,9 +326,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
- template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
+ template <unsigned BaseReg, unsigned Max>
+ bool ImmToTile(SDValue N, SDValue &Imm) {
if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
uint64_t C = CI->getZExtValue();
+
+ if (C > Max)
+ return false;
+
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ed64a7b4984c17c..24ba9dd95004c6f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
Reserved.set(SubReg);
}
+ if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
+ for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
+ SubReg.isValid(); ++SubReg)
+ Reserved.set(*SubReg);
+ }
+
markSuperRegs(Reserved, AArch64::FPCR);
if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index bb9464a8d2e1cf2..01a9fefac7eeadb 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -547,11 +547,11 @@ def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>;
def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;
-defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2">;
+defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2", int_aarch64_sme_luti2_lane_zt>;
defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">;
defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">;
-defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">;
+defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4", int_aarch64_sme_luti4_lane_zt>;
defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">;
defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 6c9b1f11a4decde..370980ba686c9d6 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
-def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
-def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
-def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
-def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
+def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>;
+def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>;
+def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>;
+def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>;
+def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>;
+def imm_to_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>;
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
@@ -3202,28 +3203,42 @@ class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
string mnemonic>
- : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB, mnemonic> {
+ : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> {
bits<4> i;
let Inst{17-14} = i;
}
-multiclass sme2_luti2_vector_index<string mnemonic> {
+multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> {
def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
+
+ def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv8i16:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv8i16:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv4i32:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _S) $zt, nxv4i32:$zn, (i32 VectorIndexB32b_timm:$imm))>;
}
class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
string mnemoni...
[truncated]
|
@llvm/pr-subscribers-backend-aarch64 Author: Matthew Devereau (MDevereau) ChangesPatch by: Hassnaa Hamdi <[email protected]> Patch is 24.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73304.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index b5655afdf419ecf..1174e30cb0885e1 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,3 +298,11 @@ multiclass ZAAddSub<string n_suffix> {
defm SVADD : ZAAddSub<"add">;
defm SVSUB : ZAAddSub<"sub">;
+
+//
+// lookup table expand one register
+//
+let TargetGuard = "sme2" in {
+ def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
new file mode 100644
index 000000000000000..ebabbfc815c1dfe
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
@@ -0,0 +1,96 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+
+// CHECK-LABEL: @test_svluti2_lane_zt_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_u8(0, zn, 2);
+}
+
+
+// CHECK-LABEL: @test_svluti2_lane_zt_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti2_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_s8(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti2_lane_zt_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti2_lane_zt_u16(svuint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_u16(0, zn, 2);
+}
+
+
+// CHECK-LABEL: @test_svluti2_lane_zt_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti2_lane_zt_s16(svint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_s16(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti2_lane_zt_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svluti2_lane_zt_u32(svuint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_u32(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti2_lane_zt_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svluti2_lane_zt_s32(svint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti2_lane_zt_s32(0, zn, 2);
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
new file mode 100644
index 000000000000000..c6b9b7c82757562
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
@@ -0,0 +1,95 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+
+// CHECK-LABEL: @test_svluti4_lane_zt_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_u8(0, zn, 2);
+}
+
+
+// CHECK-LABEL: @test_svluti4_lane_zt_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti4_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_s8(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti4_lane_zt_u16(svuint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_u16(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti4_lane_zt_s16(svint16_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_s16(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svluti4_lane_zt_u32(svuint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_u32(0, zn, 2);
+}
+
+// CHECK-LABEL: @test_svluti4_lane_zt_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[ZN:%.*]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svluti4_lane_zt_s32(svint32_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
+ return svluti4_lane_zt_s32(0, zn, 2);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
new file mode 100644
index 000000000000000..eb3862df246318b
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+void test_svluti2_lane_zt(svuint8_t zn_u8, svuint16_t zn_u16, svuint32_t zn_u32) __arm_streaming __arm_shared_za __arm_preserves_za {
+ // Test Reg Offset
+ svluti2_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti2_lane_zt_u8(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+ // Test Reg Offset
+ svluti2_lane_zt_u16(1, zn_u16, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti2_lane_zt_u16(0, zn_u16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+ // Test Reg Offset
+ svluti2_lane_zt_u32(1, zn_u32, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti2_lane_zt_u32(0, zn_u32, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+}
+void test_svluti4_lane_zt(svuint8_t zn_u8, svuint16_t zn_u16, svuint32_t zn_u32) __arm_streaming __arm_shared_za __arm_preserves_za {
+ // Test Reg Offset
+ svluti4_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti4_lane_zt_u8(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ // Test Reg Offset
+ svluti4_lane_zt_u16(1, zn_u16, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti4_lane_zt_u16(0, zn_u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ // Test Reg Offset
+ svluti4_lane_zt_u32(1, zn_u32, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Test index value range
+ svluti4_lane_zt_u32(0, zn_u32, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1b701a91455c946..096b5764a25cac8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3459,4 +3459,13 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic;
def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic;
+ //
+ // Lookup table expand one register
+ //
+ def int_aarch64_sme_luti2_lane_zt
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, LLVMMatchType<0>, llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
+ def int_aarch64_sme_luti4_lane_zt
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, LLVMMatchType<0>, llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7617dccdeee397f..7d657ea1d0047b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -326,9 +326,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
- template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
+ template <unsigned BaseReg, unsigned Max>
+ bool ImmToTile(SDValue N, SDValue &Imm) {
if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
uint64_t C = CI->getZExtValue();
+
+ if (C > Max)
+ return false;
+
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ed64a7b4984c17c..24ba9dd95004c6f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
Reserved.set(SubReg);
}
+ if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
+ for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
+ SubReg.isValid(); ++SubReg)
+ Reserved.set(*SubReg);
+ }
+
markSuperRegs(Reserved, AArch64::FPCR);
if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index bb9464a8d2e1cf2..01a9fefac7eeadb 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -547,11 +547,11 @@ def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>;
def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;
-defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2">;
+defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2", int_aarch64_sme_luti2_lane_zt>;
defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">;
defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">;
-defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">;
+defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4", int_aarch64_sme_luti4_lane_zt>;
defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">;
defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 6c9b1f11a4decde..370980ba686c9d6 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
-def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
-def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
-def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
-def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
+def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>;
+def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>;
+def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>;
+def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>;
+def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>;
+def imm_to_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>;
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
@@ -3202,28 +3203,42 @@ class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
string mnemonic>
- : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB, mnemonic> {
+ : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> {
bits<4> i;
let Inst{17-14} = i;
}
-multiclass sme2_luti2_vector_index<string mnemonic> {
+multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> {
def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
+
+ def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv8i16:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv8i16:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv4i32:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _S) $zt, nxv4i32:$zn, (i32 VectorIndexB32b_timm:$imm))>;
}
class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
string mnemoni...
[truncated]
|
clang/include/clang/Basic/arm_sme.td
Outdated
// lookup table expand one register | ||
// | ||
let TargetGuard = "sme2" in { | ||
def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "didi", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment as on #73305 that you can't use the default overloaded type for the operand, as it should use svuint8_t instead.
See ARM-software/acle#217 Patch by: Hassnaa Hamdi <[email protected]>
2933674
to
9f0813c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
See ARM-software/acle#217
Patch by: Hassnaa Hamdi [email protected]