From e297899ad909407efb3b37c358a74ec9354af228 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Thu, 7 Nov 2024 11:59:10 +0800
Subject: [PATCH] [RISCV] Allow f16/bf16 with zvfhmin/zvfbfmin as legal
 interleaved access

This is another piece split off from the work to add zvfhmin/zvfbfmin to isLegalElementTypeForRVV.
This is needed to get InterleavedAccessPass to lower [de]interleaves to segment load/stores.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  7 +++-
 .../rvv/fixed-vectors-deinterleave-load.ll    | 39 ++++++++++++++++++-
 .../rvv/fixed-vectors-interleave-store.ll     | 39 ++++++++++++++++++-
 .../RISCV/rvv/vector-deinterleave-load.ll     | 39 ++++++++++++++++++-
 .../RISCV/rvv/vector-interleave-store.ll      | 39 ++++++++++++++++++-
 5 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a625e9d5efeb5..74c1910cffc47 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21512,7 +21512,12 @@ bool RISCVTargetLowering::isLegalInterleavedAccessType(
   if (!isTypeLegal(VT))
     return false;
 
-  if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
+  // TODO: Move bf16/f16 support into isLegalElementTypeForRVV
+  if (!(isLegalElementTypeForRVV(VT.getScalarType()) ||
+        (VT.getScalarType() == MVT::bf16 &&
+         Subtarget.hasVInstructionsBF16Minimal()) ||
+        (VT.getScalarType() == MVT::f16 &&
+         Subtarget.hasVInstructionsF16Minimal())) ||
       !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
                                       Alignment))
     return false;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 1d7496397670f..76720c5641563 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
 
 ; Integers
 
@@ -107,6 +109,28 @@ declare {<2 x i64>, <2 x i64>} @llvm.vector.deinterleave2.v4i64(<4 x i64>)
 
 ; Floats
 
+define {<2 x bfloat>, <2 x bfloat>} @vector_deinterleave_load_v2bf16_v4bf16(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_v2bf16_v4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vlseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %vec = load <4 x bfloat>, ptr %p
+  %retval = call {<2 x bfloat>, <2 x bfloat>} @llvm.vector.deinterleave2.v4bf16(<4 x bfloat> %vec)
+  ret {<2 x bfloat>, <2 x bfloat>} %retval
+}
+
+define {<4 x bfloat>, <4 x bfloat>} @vector_deinterleave_load_v4bf16_v8bf16(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_v4bf16_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vlseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %vec = load <8 x bfloat>, ptr %p
+  %retval = call {<4 x bfloat>, <4 x bfloat>} @llvm.vector.deinterleave2.v8bf16(<8 x bfloat> %vec)
+  ret {<4 x bfloat>, <4 x bfloat>} %retval
+}
+
 define {<2 x half>, <2 x half>} @vector_deinterleave_load_v2f16_v4f16(ptr %p) {
 ; CHECK-LABEL: vector_deinterleave_load_v2f16_v4f16:
 ; CHECK:       # %bb.0:
@@ -140,6 +164,17 @@ define {<2 x float>, <2 x float>} @vector_deinterleave_load_v2f32_v4f32(ptr %p)
   ret {<2 x float>, <2 x float>} %retval
 }
 
+define {<8 x bfloat>, <8 x bfloat>} @vector_deinterleave_load_v8bf16_v16bf16(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_v8bf16_v16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vlseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %vec = load <16 x bfloat>, ptr %p
+  %retval = call {<8 x bfloat>, <8 x bfloat>} @llvm.vector.deinterleave2.v16bf16(<16 x bfloat> %vec)
+  ret {<8 x bfloat>, <8 x bfloat>} %retval
+}
+
 define {<8 x half>, <8 x half>} @vector_deinterleave_load_v8f16_v16f16(ptr %p) {
 ; CHECK-LABEL: vector_deinterleave_load_v8f16_v16f16:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
index 7de9b59c6853f..67c18b5eef736 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
 
 ; Integers
 
@@ -85,6 +87,28 @@ declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>)
 
 ; Floats
 
+define void @vector_interleave_store_v4bf16_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b, ptr %p) {
+; CHECK-LABEL: vector_interleave_store_v4bf16_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %res = call <4 x bfloat> @llvm.vector.interleave2.v4bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <4 x bfloat> %res, ptr %p
+  ret void
+}
+
+define void @vector_interleave_store_v8bf16_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, ptr %p) {
+; CHECK-LABEL: vector_interleave_store_v8bf16_v4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %res = call <8 x bfloat> @llvm.vector.interleave2.v8bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  store <8 x bfloat> %res, ptr %p
+  ret void
+}
+
 define void @vector_interleave_store_v4f16_v2f16(<2 x half> %a, <2 x half> %b, ptr %p) {
 ; CHECK-LABEL: vector_interleave_store_v4f16_v2f16:
 ; CHECK:       # %bb.0:
@@ -118,6 +142,17 @@ define void @vector_interleave_store_v4f32_v2f32(<2 x float> %a, <2 x float> %b,
   ret void
 }
 
+define void @vector_interleave_store_v16bf16_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, ptr %p) {
+; CHECK-LABEL: vector_interleave_store_v16bf16_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %res = call <16 x bfloat> @llvm.vector.interleave2.v16bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  store <16 x bfloat> %res, ptr %p
+  ret void
+}
+
 define void @vector_interleave_store_v16f16_v8f16(<8 x half> %a, <8 x half> %b, ptr %p) {
 ; CHECK-LABEL: vector_interleave_store_v16f16_v8f16:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 46e93087ab34b..f4c7f0f13e984 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+m | FileCheck --check-prefixes=CHECK,RV32 %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+m | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin,+m | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin,+m | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin,+m | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin,+m | FileCheck --check-prefixes=CHECK,RV64 %s
 
 ; Integers
 
@@ -199,6 +201,28 @@ declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv1
 
 ; Floats
 
+define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_load_nxv2bf16_nxv4bf16(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_nxv2bf16_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vlseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %vec = load <vscale x 4 x bfloat>, ptr %p
+  %retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
+  ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
+}
+
+define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_load_nxv4bf16_nxv8bf16(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_nxv4bf16_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vlseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %vec = load <vscale x 8 x bfloat>, ptr %p
+  %retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
+  ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
+}
+
 define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_load_nxv2f16_nxv4f16(ptr %p) {
 ; CHECK-LABEL: vector_deinterleave_load_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0:
@@ -232,6 +256,17 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_load_nx
   ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
 }
 
+define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_load_nxv8bf16_nxv16bf16(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_nxv8bf16_nxv16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vlseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %vec = load <vscale x 16 x bfloat>, ptr %p
+  %retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
+  ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
+}
+
 define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_load_nxv8f16_nxv16f16(ptr %p) {
 ; CHECK-LABEL: vector_deinterleave_load_nxv8f16_nxv16f16:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 04e1adcb37ca1..16ce25f86462e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck --check-prefixes=CHECK,RV32 %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV64 %s
 
 ; Integers
 
@@ -154,6 +156,28 @@ declare <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64>
 
 ; Floats
 
+define void @vector_interleave_store_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, ptr %p) {
+; CHECK-LABEL: vector_interleave_store_nxv4bf16_nxv2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+  store <vscale x 4 x bfloat> %res, ptr %p
+  ret void
+}
+
+define void @vector_interleave_store_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, ptr %p) {
+; CHECK-LABEL: vector_interleave_store_nxv8bf16_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+  store <vscale x 8 x bfloat> %res, ptr %p
+  ret void
+}
+
 define void @vector_interleave_store_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, ptr %p) {
 ; CHECK-LABEL: vector_interleave_store_nxv4f16_nxv2f16:
 ; CHECK:       # %bb.0:
@@ -187,6 +211,17 @@ define void @vector_interleave_store_nxv4f32_nxv2f32(<vscale x 2 x float> %a, <v
   ret void
 }
 
+define void @vector_interleave_store_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, ptr %p) {
+; CHECK-LABEL: vector_interleave_store_nxv16bf16_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+  store <vscale x 16 x bfloat> %res, ptr %p
+  ret void
+}
+
 define void @vector_interleave_store_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, ptr %p) {
 ; CHECK-LABEL: vector_interleave_store_nxv16f16_nxv8f16:
 ; CHECK:       # %bb.0: