From 04b18c15f48a0b2b0996132f68e7186a5195986d Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 3 Nov 2023 14:28:46 +0000 Subject: [PATCH 1/5] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA Adds the following SME2 builtins: - svread_hor/ver, - svwrite_hor/ver, - svread_za64, - svwrite_za64 See https://github.com/ARM-software/acle/pull/217 --- clang/include/clang/Basic/arm_sme.td | 45 + .../aarch64-sme2-intrinsics/acle_sme2_read.c | 1422 +++++++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_write.c | 1097 +++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 51 + 4 files changed, 2615 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 34dbfff6c4c85..b97629d943334 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -299,6 +299,51 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; +// SME2 - MOVA + +// +// 2 and 4 vector-group read/write intrinsics. +// + +multiclass WriteHV_VG checks> { + let TargetGuard = "sme2" in { + def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; + } +} + +defm SVWRITE_ZA8 : WriteHV_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; +defm SVWRITE_ZA16 : WriteHV_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; +defm SVWRITE_ZA32 : WriteHV_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : WriteHV_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; + +multiclass ReadHV_VG checks> { + let TargetGuard = "sme2" in { + def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + } +} + +defm SVREAD_ZA8 : ReadHV_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREAD_ZA16 : ReadHV_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; +defm SVREAD_ZA32 : ReadHV_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ReadHV_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; + +// +// Single vector-group read/write intrinsics. +// + +let TargetGuard = "sme2" in { + def SVWRITE_ZA64_VG1x2 : Inst<"svwrite_za64[_{d}]_vg1x2", "vm2", "lUld", MergeNone, "aarch64_sme_write_vg1x2", [IsSharedZA, IsStreaming], []>; + def SVWRITE_ZA64_VG1x4 : Inst<"svwrite_za64[_{d}]_vg1x4", "vm4", "lUld", MergeNone, "aarch64_sme_write_vg1x4", [IsSharedZA, IsStreaming], []>; + def SVREAD_ZA64_VG1x2 : Inst<"svread_za64_{d}_vg1x2", "2m", "lUld", MergeNone, "aarch64_sme_read_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + def SVREAD_ZA64_VG1x4 : Inst<"svread_za64_{d}_vg1x4", "4m", "lUld", MergeNone, "aarch64_sme_read_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; +} + // // Outer product and accumulate/subtract // diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c new file mode 100644 index 0000000000000..706403e5180ad --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -0,0 +1,1422 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svread_ver_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) { + return svread_ver_za8_u8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_ver_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) { + return svread_ver_za8_s8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) { + return svread_hor_za8_u8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) { + return svread_hor_za8_s8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) { + return svread_hor_za8_u8_vg4(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) { + return svread_hor_za8_s8_vg4(0, base); +} + +// CHECK-LABEL: @test_svread_ver_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) { + return svread_ver_za8_u8_vg4(0, base); +} + +// CHECK-LABEL: @test_svread_ver_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) { + return svread_ver_za8_s8_vg4(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) { + return svread_hor_za16_u16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) { + return svread_hor_za16_bf16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) { + return svread_hor_za16_f16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) { + return svread_hor_za16_s16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) { + return svread_ver_za16_u16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) { + return svread_ver_za16_bf16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) { + return svread_ver_za16_f16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) { + return svread_ver_za16_s16_vg2(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) { + return svread_hor_za16_u16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) { + return svread_hor_za16_bf16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) { + return svread_hor_za16_f16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) { + return svread_hor_za16_s16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) { + return svread_ver_za16_u16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) { + return svread_ver_za16_bf16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) { + return svread_ver_za16_f16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_ver_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) { + return svread_ver_za16_s16_vg4(1, base); +} + +// CHECK-LABEL: @test_svread_hor_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) { + return svread_hor_za32_u32_vg2(3, base); +} + +// CHECK-LABEL: @test_svread_hor_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) { + return svread_hor_za32_f32_vg2(3, base); +} + +// CHECK-LABEL: @test_svread_hor_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) { + return svread_hor_za32_s32_vg2(3, base); +} + +// CHECK-LABEL: @test_svread_ver_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) { + return svread_ver_za32_u32_vg2(3, base); +} + +// CHECK-LABEL: @test_svread_ver_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) { + return svread_ver_za32_f32_vg2(3, base); +} + +// CHECK-LABEL: @test_svread_ver_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) { + return svread_ver_za32_s32_vg2(3, base); +} + +// CHECK-LABEL: @test_svread_hor_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) { + return svread_hor_za32_u32_vg4(3, base); +} + +// CHECK-LABEL: @test_svread_hor_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) { + return svread_hor_za32_f32_vg4(3, base); +} + +// CHECK-LABEL: @test_svread_hor_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) { + return svread_hor_za32_s32_vg4(3, base); +} + +// CHECK-LABEL: @test_svread_ver_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) { + return svread_ver_za32_u32_vg4(3, base); +} + +// CHECK-LABEL: @test_svread_ver_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) { + return svread_ver_za32_f32_vg4(3, base); +} + +// CHECK-LABEL: @test_svread_ver_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) { + return svread_ver_za32_s32_vg4(3, base); +} + +// CHECK-LABEL: @test_svread_hor_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) { + return svread_hor_za64_u64_vg2(7, base); +} + +// CHECK-LABEL: @test_svread_hor_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) { + return svread_hor_za64_f64_vg2(7, base); +} + +// CHECK-LABEL: @test_svread_hor_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) { + return svread_hor_za64_s64_vg2(7, base); +} + +// CHECK-LABEL: @test_svread_ver_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) { + return svread_ver_za64_u64_vg2(7, base); +} + +// CHECK-LABEL: @test_svread_ver_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) { + return svread_ver_za64_f64_vg2(7, base); +} + +// CHECK-LABEL: @test_svread_ver_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) { + return svread_ver_za64_s64_vg2(7, base); +} + +// CHECK-LABEL: @test_svread_hor_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) { + return svread_hor_za64_u64_vg4(7, base); +} + +// CHECK-LABEL: @test_svread_hor_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) { + return svread_hor_za64_f64_vg4(7, base); +} + +// CHECK-LABEL: @test_svread_hor_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) { + return svread_hor_za64_s64_vg4(7, base); +} + +// CHECK-LABEL: @test_svread_ver_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) { + return svread_ver_za64_u64_vg4(7, base); +} + +// CHECK-LABEL: @test_svread_ver_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) { + return svread_ver_za64_f64_vg4(7, base); +} + +// CHECK-LABEL: @test_svread_ver_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) { + return svread_ver_za64_s64_vg4(7, base); +} + +// CHECK-LABEL: @test_svread_za64_u64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) { + return svread_za64_u64_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za64_f64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) { + return svread_za64_f64_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za64_s64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) { + return svread_za64_s64_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za64_u64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) { + return svread_za64_u64_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za64_f64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) { + return svread_za64_f64_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za64_s64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) { + return svread_za64_s64_vg1x4(base); +} + diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c new file mode 100644 index 0000000000000..745eb7f3edb56 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c @@ -0,0 +1,1097 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -target-feature +sve -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svwrite_ver_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_u8_vg2j11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg2,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_s8_vg2j10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg2,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_u8_vg2j11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg2,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_s8_vg2j10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg2,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_u8_vg4j11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg4,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_s8_vg4j10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg4,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_u8_vg4j11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg4,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_s8_vg4j10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg4,)(0, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_u16_vg2j12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_hor_za16_bf16_vg2j14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_f16_vg2j13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_s16_vg2j11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_u16_vg2j12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_ver_za16_bf16_vg2j14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_f16_vg2j13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_s16_vg2j11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg2,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_u16_vg4j12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_hor_za16_bf16_vg4j14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_f16_vg4j13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_s16_vg4j11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_u16_vg4j12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_ver_za16_bf16_vg4j14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_f16_vg4j13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_s16_vg4j11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg4,)(1, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_u32_vg2j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg2,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_f32_vg2j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg2,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_s32_vg2j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg2,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_u32_vg2j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg2,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_f32_vg2j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg2,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_s32_vg2j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg2,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_u32_vg4j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg4,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_f32_vg4j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg4,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_s32_vg4j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg4,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_u32_vg4j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg4,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_f32_vg4j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg4,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_s32_vg4j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg4,)(3, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_u64_vg2j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg2,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_f64_vg2j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg2,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_s64_vg2j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg2,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_u64_vg2j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg2,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_f64_vg2j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg2,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_s64_vg2j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg2,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_u64_vg4j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg4,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_u64_vg4j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg4,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_f64_vg4j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg4,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_s64_vg4j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg4,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_f64_vg4j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg4,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_s64_vg4j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg4,)(7, base, val); +} + +// CHECK-LABEL: @test_svwrite_za64_u64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_u64_vg1x2j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) { + SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za64_f64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_f64_vg1x2j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) { + SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za64_s64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_s64_vg1x2j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) { + SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za64_u64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_u64_vg1x4j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) { + SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za64_f64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_f64_vg1x4j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) { + SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za64_s64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_s64_vg1x4j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) { + SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x4,)(base, val); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 70987ad395f73..77b1133f55101 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -5,6 +5,57 @@ #include +void test_multivector_read(uint32_t base) { + + // Test Tile Range + svread_hor_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svread_ver_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svread_hor_za8_u8_vg4(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svread_ver_za8_u8_vg4(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + + svread_hor_za16_u16_vg2(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svread_ver_za16_u16_vg2(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svread_hor_za16_u16_vg4(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svread_ver_za16_u16_vg4(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + + svread_hor_za32_u32_vg2(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svread_ver_za32_u32_vg2(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svread_hor_za32_u32_vg4(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svread_ver_za32_u32_vg4(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + + svread_hor_za64_u64_vg2(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svread_ver_za64_u64_vg2(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svread_hor_za64_u64_vg4(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svread_ver_za64_u64_vg4(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +} + +void test_multivector_write(uint32_t base, svuint8x2_t v8x2, svuint8x4_t v8x4, + svuint16x2_t v16x2, svuint16x4_t v16x4, + svuint32x2_t v32x2, svuint32x4_t v32x4, + svuint64x2_t v64x2, svuint64x4_t v64x4) { + + // Test Tile Range + svwrite_hor_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svwrite_ver_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svwrite_hor_za8_u8_vg4(1, base, v8x4); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svwrite_ver_za8_u8_vg4(1, base, v8x4); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + + svwrite_hor_za16_u16_vg2(2, base, v16x2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svwrite_ver_za16_u16_vg2(2, base, v16x2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svwrite_hor_za16_u16_vg4(2, base, v16x4); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svwrite_ver_za16_u16_vg4(2, base, v16x4); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + + svwrite_hor_za32_u32_vg2(4, base, v32x2); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svwrite_ver_za32_u32_vg2(4, base, v32x2); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svwrite_hor_za32_u32_vg4(4, base, v32x4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svwrite_ver_za32_u32_vg4(4, base, v32x4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + + svwrite_hor_za64_u64_vg2(8, base, v64x2); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svwrite_ver_za64_u64_vg2(8, base, v64x2); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svwrite_hor_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svwrite_ver_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +} + void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_shared_za { // Test Tile Range svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} From c369e641b58328fdde662d7b97557855a5ded3e5 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 13 Nov 2023 10:39:29 +0000 Subject: [PATCH 2/5] - Add arm_streaming arm_shared_za & arm_preserves_za attributes to tests --- .../aarch64-sme2-intrinsics/acle_sme2_read.c | 108 +++++++++--------- .../aarch64-sme2-intrinsics/acle_sme2_write.c | 108 +++++++++--------- .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 4 +- 3 files changed, 110 insertions(+), 110 deletions(-) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index 706403e5180ad..89700afca41e4 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -34,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) { +svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za8_u8_vg2(0, base); } @@ -56,7 +56,7 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) { +svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za8_s8_vg2(0, base); } @@ -78,7 +78,7 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) { +svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za8_u8_vg2(0, base); } @@ -100,7 +100,7 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) { +svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za8_s8_vg2(0, base); } @@ -130,7 +130,7 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) { +svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za8_u8_vg4(0, base); } @@ -160,7 +160,7 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) { +svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za8_s8_vg4(0, base); } @@ -190,7 +190,7 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) { +svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za8_u8_vg4(0, base); } @@ -220,7 +220,7 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) { +svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za8_s8_vg4(0, base); } @@ -242,7 +242,7 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) { +svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_u16_vg2(1, base); } @@ -264,7 +264,7 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) { +svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_bf16_vg2(1, base); } @@ -286,7 +286,7 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) { +svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_f16_vg2(1, base); } @@ -308,7 +308,7 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) { +svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_s16_vg2(1, base); } @@ -330,7 +330,7 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) { +svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_u16_vg2(1, base); } @@ -352,7 +352,7 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) { +svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_bf16_vg2(1, base); } @@ -374,7 +374,7 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) { +svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_f16_vg2(1, base); } @@ -396,7 +396,7 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) { +svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_s16_vg2(1, base); } @@ -426,7 +426,7 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) { +svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_u16_vg4(1, base); } @@ -456,7 +456,7 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) { +svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_bf16_vg4(1, base); } @@ -486,7 +486,7 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) { +svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_f16_vg4(1, base); } @@ -516,7 +516,7 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) { +svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za16_s16_vg4(1, base); } @@ -546,7 +546,7 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) { +svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_u16_vg4(1, base); } @@ -576,7 +576,7 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) { +svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_bf16_vg4(1, base); } @@ -606,7 +606,7 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) { +svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_f16_vg4(1, base); } @@ -636,7 +636,7 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) { +svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za16_s16_vg4(1, base); } @@ -658,7 +658,7 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) { +svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za32_u32_vg2(3, base); } @@ -680,7 +680,7 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) { +svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za32_f32_vg2(3, base); } @@ -702,7 +702,7 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) { +svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za32_s32_vg2(3, base); } @@ -724,7 +724,7 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) { +svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za32_u32_vg2(3, base); } @@ -746,7 +746,7 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) { +svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za32_f32_vg2(3, base); } @@ -768,7 +768,7 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) { +svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za32_s32_vg2(3, base); } @@ -798,7 +798,7 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) { +svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za32_u32_vg4(3, base); } @@ -828,7 +828,7 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) { +svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za32_f32_vg4(3, base); } @@ -858,7 +858,7 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) { +svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za32_s32_vg4(3, base); } @@ -888,7 +888,7 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) { +svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za32_u32_vg4(3, base); } @@ -918,7 +918,7 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) { +svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za32_f32_vg4(3, base); } @@ -948,7 +948,7 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) { +svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za32_s32_vg4(3, base); } @@ -970,7 +970,7 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) { +svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za64_u64_vg2(7, base); } @@ -992,7 +992,7 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) { +svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za64_f64_vg2(7, base); } @@ -1014,7 +1014,7 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) { +svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za64_s64_vg2(7, base); } @@ -1036,7 +1036,7 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) { +svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za64_u64_vg2(7, base); } @@ -1058,7 +1058,7 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) { +svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za64_f64_vg2(7, base); } @@ -1080,7 +1080,7 @@ svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) { +svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za64_s64_vg2(7, base); } @@ -1110,7 +1110,7 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) { +svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za64_u64_vg4(7, base); } @@ -1140,7 +1140,7 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) { +svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za64_f64_vg4(7, base); } @@ -1170,7 +1170,7 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) { +svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_hor_za64_s64_vg4(7, base); } @@ -1200,7 +1200,7 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) { +svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za64_u64_vg4(7, base); } @@ -1230,7 +1230,7 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) { +svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za64_f64_vg4(7, base); } @@ -1260,7 +1260,7 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) { +svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_ver_za64_s64_vg4(7, base); } @@ -1282,7 +1282,7 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) { +svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_u64_vg1x2(base); } @@ -1304,7 +1304,7 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) { +svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_f64_vg1x2(base); } @@ -1326,7 +1326,7 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) { +svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_s64_vg1x2(base); } @@ -1356,7 +1356,7 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) { +svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_u64_vg1x4(base); } @@ -1386,7 +1386,7 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) { +svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_f64_vg1x4(base); } @@ -1416,7 +1416,7 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) { // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) { +svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_s64_vg1x4(base); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c index 745eb7f3edb56..99aa221834724 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c @@ -30,7 +30,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) { +void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg2,)(0, base, val); } @@ -48,7 +48,7 @@ void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) { +void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg2,)(0, base, val); } @@ -66,7 +66,7 @@ void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) { +void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg2,)(0, base, val); } @@ -84,7 +84,7 @@ void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) { +void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg2,)(0, base, val); } @@ -106,7 +106,7 @@ void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) { +void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg4,)(0, base, val); } @@ -128,7 +128,7 @@ void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) { +void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg4,)(0, base, val); } @@ -150,7 +150,7 @@ void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) { +void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg4,)(0, base, val); } @@ -172,7 +172,7 @@ void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) { +void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg4,)(0, base, val); } @@ -190,7 +190,7 @@ void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) { +void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg2,)(1, base, val); } @@ -208,7 +208,7 @@ void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { +void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg2,)(1, base, val); } @@ -226,7 +226,7 @@ void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { +void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg2,)(1, base, val); } @@ -244,7 +244,7 @@ void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) { +void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg2,)(1, base, val); } @@ -262,7 +262,7 @@ void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) { +void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg2,)(1, base, val); } @@ -280,7 +280,7 @@ void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { +void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg2,)(1, base, val); } @@ -298,7 +298,7 @@ void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { +void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg2,)(1, base, val); } @@ -316,7 +316,7 @@ void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) { +void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg2,)(1, base, val); } @@ -338,7 +338,7 @@ void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) { +void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg4,)(1, base, val); } @@ -360,7 +360,7 @@ void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { +void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg4,)(1, base, val); } @@ -382,7 +382,7 @@ void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { +void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg4,)(1, base, val); } @@ -404,7 +404,7 @@ void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) { +void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg4,)(1, base, val); } @@ -426,7 +426,7 @@ void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) { +void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg4,)(1, base, val); } @@ -448,7 +448,7 @@ void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { +void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg4,)(1, base, val); } @@ -470,7 +470,7 @@ void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { +void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg4,)(1, base, val); } @@ -492,7 +492,7 @@ void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) { +void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg4,)(1, base, val); } @@ -510,7 +510,7 @@ void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) { +void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg2,)(3, base, val); } @@ -528,7 +528,7 @@ void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { +void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg2,)(3, base, val); } @@ -546,7 +546,7 @@ void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) { +void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg2,)(3, base, val); } @@ -564,7 +564,7 @@ void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) { +void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg2,)(3, base, val); } @@ -582,7 +582,7 @@ void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { +void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg2,)(3, base, val); } @@ -600,7 +600,7 @@ void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) { +void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg2,)(3, base, val); } @@ -622,7 +622,7 @@ void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) { +void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg4,)(3, base, val); } @@ -644,7 +644,7 @@ void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { +void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg4,)(3, base, val); } @@ -666,7 +666,7 @@ void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) { +void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg4,)(3, base, val); } @@ -688,7 +688,7 @@ void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) { +void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg4,)(3, base, val); } @@ -710,7 +710,7 @@ void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { +void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg4,)(3, base, val); } @@ -732,7 +732,7 @@ void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) { +void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg4,)(3, base, val); } @@ -750,7 +750,7 @@ void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) { +void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg2,)(7, base, val); } @@ -768,7 +768,7 @@ void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { +void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg2,)(7, base, val); } @@ -786,7 +786,7 @@ void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) { +void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg2,)(7, base, val); } @@ -804,7 +804,7 @@ void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) { +void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg2,)(7, base, val); } @@ -822,7 +822,7 @@ void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { +void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg2,)(7, base, val); } @@ -840,7 +840,7 @@ void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) { +void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg2,)(7, base, val); } @@ -862,7 +862,7 @@ void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) { +void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg4,)(7, base, val); } @@ -884,7 +884,7 @@ void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) { +void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg4,)(7, base, val); } @@ -906,7 +906,7 @@ void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { +void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg4,)(7, base, val); } @@ -928,7 +928,7 @@ void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) { +void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg4,)(7, base, val); } @@ -950,7 +950,7 @@ void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { +void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg4,)(7, base, val); } @@ -972,7 +972,7 @@ void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) { +void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg4,)(7, base, val); } @@ -990,7 +990,7 @@ void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) { +void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x2,)(base, val); } @@ -1008,7 +1008,7 @@ void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) { +void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x2,)(base, val); } @@ -1026,7 +1026,7 @@ void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) { +void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x2,)(base, val); } @@ -1048,7 +1048,7 @@ void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) { +void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x4,)(base, val); } @@ -1070,7 +1070,7 @@ void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) { +void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x4,)(base, val); } @@ -1092,6 +1092,6 @@ void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) { +void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x4,)(base, val); } diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 77b1133f55101..63a7a40c3273a 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -5,7 +5,7 @@ #include -void test_multivector_read(uint32_t base) { +void test_multivector_read(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { // Test Tile Range svread_hor_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} @@ -32,7 +32,7 @@ void test_multivector_read(uint32_t base) { void test_multivector_write(uint32_t base, svuint8x2_t v8x2, svuint8x4_t v8x4, svuint16x2_t v16x2, svuint16x4_t v16x4, svuint32x2_t v32x2, svuint32x4_t v32x4, - svuint64x2_t v64x2, svuint64x4_t v64x4) { + svuint64x2_t v64x2, svuint64x4_t v64x4) __arm_streaming __arm_shared_za { // Test Tile Range svwrite_hor_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} From 884f9e91dc8e3f674c5f8fb5ed5a7ae1e6e01bb4 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Tue, 5 Dec 2023 13:51:24 +0000 Subject: [PATCH 3/5] - Added overloaded types for svread & svwrite. (See https://github.com/ARM-software/acle/pull/278) --- clang/include/clang/Basic/arm_sme.td | 57 +-- .../aarch64-sme2-intrinsics/acle_sme2_read.c | 469 +++++++++++++++++- .../aarch64-sme2-intrinsics/acle_sme2_write.c | 360 ++++++++++++++ llvm/lib/Target/AArch64/SMEInstrFormats.td | 20 +- .../AArch64/sme2-intrinsics-extract-mova.ll | 168 +++++++ .../AArch64/sme2-intrinsics-insert-mova.ll | 204 ++++++++ 6 files changed, 1241 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b97629d943334..a46e2bdb34c6f 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -302,46 +302,39 @@ defm SVSUB : ZAAddSub<"sub">; // SME2 - MOVA // -// 2 and 4 vector-group read/write intrinsics. +// Single, 2 and 4 vector-group read/write intrinsics. // -multiclass WriteHV_VG checks> { - let TargetGuard = "sme2" in { - def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; - } +multiclass ZAWrite_VG checks> { + def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG1x2 : Inst<"svwrite_" # n # "_vg1x2", "vm2", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsStreaming], []>; + def NAME # _VG1x4 : Inst<"svwrite_" # n # "_vg1x4", "vm4", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsStreaming], []>; } -defm SVWRITE_ZA8 : WriteHV_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; -defm SVWRITE_ZA16 : WriteHV_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; -defm SVWRITE_ZA32 : WriteHV_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; -defm SVWRITE_ZA64 : WriteHV_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; - -multiclass ReadHV_VG checks> { - let TargetGuard = "sme2" in { - def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - } +let TargetGuard = "sme2" in { + defm SVWRITE_ZA8 : ZAWrite_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; + defm SVWRITE_ZA16 : ZAWrite_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; + defm SVWRITE_ZA32 : ZAWrite_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; + defm SVWRITE_ZA64 : ZAWrite_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; } -defm SVREAD_ZA8 : ReadHV_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; -defm SVREAD_ZA16 : ReadHV_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; -defm SVREAD_ZA32 : ReadHV_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; -defm SVREAD_ZA64 : ReadHV_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; - -// -// Single vector-group read/write intrinsics. -// +multiclass ZARead_VG checks> { + def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG1x2 : Inst<"svread_" # n # "_vg1x2", "2m", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + def NAME # _VG1x4 : Inst<"svread_" # n # "_vg1x4", "4m", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; +} let TargetGuard = "sme2" in { - def SVWRITE_ZA64_VG1x2 : Inst<"svwrite_za64[_{d}]_vg1x2", "vm2", "lUld", MergeNone, "aarch64_sme_write_vg1x2", [IsSharedZA, IsStreaming], []>; - def SVWRITE_ZA64_VG1x4 : Inst<"svwrite_za64[_{d}]_vg1x4", "vm4", "lUld", MergeNone, "aarch64_sme_write_vg1x4", [IsSharedZA, IsStreaming], []>; - def SVREAD_ZA64_VG1x2 : Inst<"svread_za64_{d}_vg1x2", "2m", "lUld", MergeNone, "aarch64_sme_read_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; - def SVREAD_ZA64_VG1x4 : Inst<"svread_za64_{d}_vg1x4", "4m", "lUld", MergeNone, "aarch64_sme_read_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + defm SVREAD_ZA8 : ZARead_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; + defm SVREAD_ZA16 : ZARead_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; + defm SVREAD_ZA32 : ZARead_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; + defm SVREAD_ZA64 : ZARead_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; } // diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index 89700afca41e4..63abc9acfdf9c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -1264,6 +1264,204 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_sh return svread_ver_za64_s64_vg4(7, base); } +// CHECK-LABEL: @test_svread_za8_s8_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za8_s8_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za8_u8_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za8_u8_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za16_s16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_s16_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za16_u16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_u16_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za16_bf16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_bf16_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za16_f16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_f16_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za32_s32_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za32_s32_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za32_u32_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za32_u32_vg1x2(base); +} + +// CHECK-LABEL: @test_svread_za32_f32_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za32_f32_vg1x2(base); +} + // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) @@ -1330,6 +1528,276 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_shar return svread_za64_s64_vg1x2(base); } +// CHECK-LABEL: @test_svread_za8_s8_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za8_s8_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za8_u8_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za8_u8_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za16_s16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_s16_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za16_u16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_u16_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za16_bf16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_bf16_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za16_f16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za16_f16_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za32_s32_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za32_s32_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za32_u32_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za32_u32_vg1x4(base); +} + +// CHECK-LABEL: @test_svread_za32_f32_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { + return svread_za32_f32_vg1x4(base); +} + // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) @@ -1419,4 +1887,3 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_sh svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { return svread_za64_s64_vg1x4(base); } - diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c index 99aa221834724..14b0371bce574 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c @@ -976,6 +976,168 @@ void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streami SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg4,)(7, base, val); } +// CHECK-LABEL: @test_svwrite_za8_s8_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_za8_s8_vg1x2j10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za8_s8_vg1x2(uint32_t base, svint8x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za8,_s8,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za8_u8_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_za8_u8_vg1x2j11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za8_u8_vg1x2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za8,_u8,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_s16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za16_s16_vg1x2j11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_s16_vg1x2(uint32_t base, svint16x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_s16,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_u16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za16_u16_vg1x2j12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_u16_vg1x2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_u16,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_bf16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_za16_bf16_vg1x2j14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_bf16_vg1x2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_bf16,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_f16_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za16_f16_vg1x2j13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_f16_vg1x2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_f16,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za32_s32_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za32_s32_vg1x2j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za32_s32_vg1x2(uint32_t base, svint32x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za32,_s32,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za32_u32_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za32_u32_vg1x2j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za32_u32_vg1x2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za32,_u32,_vg1x2,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za32_f32_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za32_f32_vg1x2j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za32_f32_vg1x2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za32,_f32,_vg1x2,)(base, val); +} + // CHECK-LABEL: @test_svwrite_za64_u64_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) @@ -1030,6 +1192,204 @@ void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x2,)(base, val); } +// CHECK-LABEL: @test_svwrite_za8_s8_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_za8_s8_vg1x4j10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za8_s8_vg1x4(uint32_t base, svint8x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za8,_s8,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za8_u8_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_za8_u8_vg1x4j11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za8_u8_vg1x4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za8,_u8,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_s16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za16_s16_vg1x4j11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_s16_vg1x4(uint32_t base, svint16x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_s16,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_u16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za16_u16_vg1x4j12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_u16_vg1x4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_u16,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_bf16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_za16_bf16_vg1x4j14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_bf16_vg1x4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_bf16,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za16_f16_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za16_f16_vg1x4j13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za16_f16_vg1x4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za16,_f16,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za32_s32_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za32_s32_vg1x4j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za32_s32_vg1x4(uint32_t base, svint32x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za32,_s32,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za32_u32_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za32_u32_vg1x4j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za32_u32_vg1x4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za32,_u32,_vg1x4,)(base, val); +} + +// CHECK-LABEL: @test_svwrite_za32_f32_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za32_f32_vg1x4j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za32_f32_vg1x4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svwrite_za32,_f32,_vg1x4,)(base, val); +} + // CHECK-LABEL: @test_svwrite_za64_u64_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 408b897070af0..46353c60fe851 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3686,8 +3686,14 @@ multiclass sme2_mova_vec_to_array_vg2_multi; - def : SME2_ZA_VG1x2_Multi_Pat; - def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, @@ -3779,8 +3785,14 @@ multiclass sme2_mova_vec_to_array_vg4_multi; - def : SME2_ZA_VG1x4_Multi_Pat; - def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll index ad1c524d5610e..1e1895fc54c2a 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll @@ -399,6 +399,84 @@ define { , , , , } @za_read_vg1x2_b(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 %slice.7) + ret { , } %res2 +} + +define { , } @za_read_vg1x2_h(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 %slice.7) + ret { , } %res2 +} + +define { , } @za_read_vg1x2_f16(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 %slice.7) + ret { , } %res2 +} + +define { , } @za_read_vg1x2_bf16(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 %slice.7) + ret { , } %res2 +} + +define { , } @za_read_vg1x2_s(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 %slice.7) + ret { , } %res2 +} + +define { , } @za_read_vg1x2_f32(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 %slice.7) + ret { , } %res2 +} + define { , } @za_read_vg1x2_d(i32 %slice) { ; CHECK-LABEL: za_read_vg1x2_d: ; CHECK: // %bb.0: @@ -427,6 +505,84 @@ define { , } @za_read_vg1x2_f64(i32 % ; Move Multi-Vector From ZA (Read) x4 +define { , , , } @za_read_vg1x4_b(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 %slice.7) + ret { , , , } %res2 +} + +define { , , , } @za_read_vg1x4_h(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 %slice.7) + ret { , , , } %res2 +} + +define { , , , } @za_read_vg1x4_f16(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 %slice.7) + ret { , , , } %res2 +} + +define { , , , } @za_read_vg1x4_bf16(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 %slice.7) + ret { , , , } %res2 +} + +define { , , , } @za_read_vg1x4_s(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 %slice.7) + ret { , , , } %res2 +} + +define { , , , } @za_read_vg1x4_f32(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 %slice.7) + ret { , , , } %res2 +} + define { , , , } @za_read_vg1x4_d(i32 %slice) { ; CHECK-LABEL: za_read_vg1x4_d: ; CHECK: // %bb.0: @@ -489,8 +645,20 @@ declare { , , , , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32, i32) declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32, i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32) declare { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32) declare { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32) declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32) declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll index 2712cbc1d7efd..99de6f832a3c9 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll @@ -497,6 +497,96 @@ define void @za_write_vg4_vert_f64(i32 %slice, %zn1, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 %slice, %za1, %za2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 %slice.7, %za1, %za2) + ret void +} + +define void @za_write_vg1x2_h(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_h: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 %slice, %za1, %za2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 %slice.7, %za1, %za2) + ret void +} + +define void @za_write_vg1x2_f16(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 %slice, %za1, %za2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 %slice.7, %za1, %za2) + ret void +} + +define void @za_write_vg1x2_bf16(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 %slice, %za1, %za2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 %slice.7, %za1, %za2) + ret void +} + +define void @za_write_vg1x2_s(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 %slice, %za1, %za2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 %slice.7, %za1, %za2) + ret void +} + +define void @za_write_vg1x2_f32(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 %slice, %za1, %za2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 %slice.7, %za1, %za2) + ret void +} + define void @za_write_vg1x2_d(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_d: ; CHECK: // %bb.0: @@ -531,6 +621,108 @@ define void @za_write_vg1x2_f64(i32 %slice, %za1, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 %slice, %za1, %za2, %za3, %za4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 %slice.7, %za1, %za2, %za3, %za4) + ret void +} + +define void @za_write_vg1x4_h(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_h: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 %slice, %za1, %za2, %za3, %za4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 %slice.7, %za1, %za2, %za3, %za4) + ret void +} + +define void @za_write_vg1x4_f16(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 %slice, %za1, %za2, %za3, %za4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 %slice.7, %za1, %za2, %za3, %za4) + ret void +} + +define void @za_write_vg1x4_bf16(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 %slice, %za1, %za2, %za3, %za4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 %slice.7, %za1, %za2, %za3, %za4) + ret void +} + +define void @za_write_vg1x4_s(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 %slice, %za1, %za2, %za3, %za4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 %slice.7, %za1, %za2, %za3, %za4) + ret void +} + +define void @za_write_vg1x4_f32(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 %slice, %za1, %za2, %za3, %za4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 %slice.7, %za1, %za2, %za3, %za4) + ret void +} + define void @za_write_vg1x4_d(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_d: ; CHECK: // %bb.0: @@ -601,8 +793,20 @@ declare void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32, i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32, , , , ) From dfe59ed4ce77fbc563feba1bda3b05e90ae83cdb Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Tue, 5 Dec 2023 16:34:04 +0000 Subject: [PATCH 4/5] - Remove unused SVE_ACLE_FUNC & SVE_OVERLOADED_FORMS run lines from acle_sme2_read.c --- .../CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index 63abc9acfdf9c..583a7fc815472 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -3,19 +3,10 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) From b4d97a01bdfd6fa8a8a6d8180c0d6389d5538869 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 15 Dec 2023 13:53:09 +0000 Subject: [PATCH 5/5] - Moved more of the builtin name into the read & write multiclasses --- clang/include/clang/Basic/arm_sme.td | 40 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index a46e2bdb34c6f..ecaa59a898829 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -306,35 +306,35 @@ defm SVSUB : ZAAddSub<"sub">; // multiclass ZAWrite_VG checks> { - def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; - def NAME # _VG1x2 : Inst<"svwrite_" # n # "_vg1x2", "vm2", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsStreaming], []>; - def NAME # _VG1x4 : Inst<"svwrite_" # n # "_vg1x4", "vm4", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsStreaming], []>; + def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "[_{d}]_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "[_{d}]_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "[_{d}]_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "[_{d}]_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG1x2 : Inst<"svwrite_" # n # "[_{d}]_vg1x2", "vm2", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsStreaming], []>; + def NAME # _VG1x4 : Inst<"svwrite_" # n # "[_{d}]_vg1x4", "vm4", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsStreaming], []>; } let TargetGuard = "sme2" in { - defm SVWRITE_ZA8 : ZAWrite_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; - defm SVWRITE_ZA16 : ZAWrite_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; - defm SVWRITE_ZA32 : ZAWrite_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; - defm SVWRITE_ZA64 : ZAWrite_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; + defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; + defm SVWRITE_ZA16 : ZAWrite_VG<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; + defm SVWRITE_ZA32 : ZAWrite_VG<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; + defm SVWRITE_ZA64 : ZAWrite_VG<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; } multiclass ZARead_VG checks> { - def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; - def NAME # _VG1x2 : Inst<"svread_" # n # "_vg1x2", "2m", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; - def NAME # _VG1x4 : Inst<"svread_" # n # "_vg1x4", "4m", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + def NAME # _VG2_H : Inst<"svread_hor_" # n # "_{d}_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svread_ver_" # n # "_{d}_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svread_hor_" # n # "_{d}_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svread_ver_" # n # "_{d}_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG1x2 : Inst<"svread_" # n # "_{d}_vg1x2", "2m", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + def NAME # _VG1x4 : Inst<"svread_" # n # "_{d}_vg1x4", "4m", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; } let TargetGuard = "sme2" in { - defm SVREAD_ZA8 : ZARead_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; - defm SVREAD_ZA16 : ZARead_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; - defm SVREAD_ZA32 : ZARead_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; - defm SVREAD_ZA64 : ZARead_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; + defm SVREAD_ZA8 : ZARead_VG<"za8", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; + defm SVREAD_ZA16 : ZARead_VG<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; + defm SVREAD_ZA32 : ZARead_VG<"za32", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; + defm SVREAD_ZA64 : ZARead_VG<"za64", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; } //