Skip to content

[AArch64] Allow single-element vector FP converts with +sme2p2 #112905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ def HasSMEF16F16orSMEF8F16
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
def HasNEONandIsSME2p2StreamingSafe
: Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
Expand Down Expand Up @@ -6191,8 +6195,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
let Predicates = [HasNEON] in {
let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
Expand All @@ -6202,8 +6205,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;

// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
let Predicates = [HasNEON, HasFullFP16] in {
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
Expand All @@ -6226,8 +6228,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),

// fp16: integer extraction from vector must be at least 32-bits to be legal.
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
let Predicates = [HasNEON, HasFullFP16] in {
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
Expand Down
71 changes: 36 additions & 35 deletions llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS

target triple = "aarch64-unknown-linux-gnu"

Expand All @@ -11,11 +12,11 @@ define double @t1(double %x) {
; CHECK-NEXT: scvtf d0, x8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t1:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs d0, d0
; NON-STREAMING-NEXT: scvtf d0, d0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t1:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
Expand All @@ -29,11 +30,11 @@ define float @t2(float %x) {
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t2:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs s0, s0
; NON-STREAMING-NEXT: scvtf s0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t2:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
Expand All @@ -49,13 +50,13 @@ define half @t3(half %x) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t3:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
; NON-STREAMING-NEXT: fcvtzs s0, s0
; NON-STREAMING-NEXT: scvtf s0, s0
; NON-STREAMING-NEXT: fcvt h0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t3:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
Expand All @@ -69,11 +70,11 @@ define double @t4(double %x) {
; CHECK-NEXT: ucvtf d0, x8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t4:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu d0, d0
; NON-STREAMING-NEXT: ucvtf d0, d0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t4:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0
; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
Expand All @@ -87,11 +88,11 @@ define float @t5(float %x) {
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t5:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu s0, s0
; NON-STREAMING-NEXT: ucvtf s0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t5:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
Expand All @@ -107,13 +108,13 @@ define half @t6(half %x) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t6:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
; NON-STREAMING-NEXT: fcvtzu s0, s0
; NON-STREAMING-NEXT: ucvtf s0, s0
; NON-STREAMING-NEXT: fcvt h0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t6:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
Expand Down
Loading