-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV][GISel] Add calling convention support for half #94110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-globalisel Author: Yingwei Zheng (dtcxzyw) ChangesFull diff: https://github.com/llvm/llvm-project/pull/94110.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index c18892ac62f24..beee9405de02a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -340,7 +340,7 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
// supported yet.
if (T->isIntegerTy())
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
- if (T->isFloatTy() || T->isDoubleTy())
+ if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
return true;
@@ -361,7 +361,7 @@ static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget,
// supported yet.
if (T->isIntegerTy())
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
- if (T->isFloatTy() || T->isDoubleTy())
+ if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
return true;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index da8daa573b89b..a091380e8ce82 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -849,6 +849,8 @@ const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank(
}
if (RB.getID() == RISCV::FPRBRegBankID) {
+ if (Ty.getSizeInBits() == 16)
+ return &RISCV::FPR16RegClass;
if (Ty.getSizeInBits() == 32)
return &RISCV::FPR32RegClass;
if (Ty.getSizeInBits() == 64)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll
new file mode 100644
index 0000000000000..dd7335362c6bd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \
+; RUN: -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+zfh -target-abi ilp32f \
+; RUN: -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32IZFH %s
+
+define half @callee_half_in_regs(half %x) nounwind {
+ ; RV32I-LABEL: name: callee_half_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IZFH-LABEL: name: callee_half_in_regs
+ ; RV32IZFH: bb.1 (%ir-block.0):
+ ; RV32IZFH-NEXT: liveins: $f10_h
+ ; RV32IZFH-NEXT: {{ $}}
+ ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+ ; RV32IZFH-NEXT: $f10_h = COPY [[COPY]](s16)
+ ; RV32IZFH-NEXT: PseudoRET implicit $f10_h
+ ret half %x
+}
+
+define half @callee_half_mixed_with_int(i32 %x0, half %x) nounwind {
+ ; RV32I-LABEL: name: callee_half_mixed_with_int
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IZFH-LABEL: name: callee_half_mixed_with_int
+ ; RV32IZFH: bb.1 (%ir-block.0):
+ ; RV32IZFH-NEXT: liveins: $x10, $f10_h
+ ; RV32IZFH-NEXT: {{ $}}
+ ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h
+ ; RV32IZFH-NEXT: $f10_h = COPY [[COPY1]](s16)
+ ; RV32IZFH-NEXT: PseudoRET implicit $f10_h
+ ret half %x
+}
+
+define half @callee_half_return_stack1(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, half %x) nounwind {
+ ; RV32I-LABEL: name: callee_half_return_stack1
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IZFH-LABEL: name: callee_half_return_stack1
+ ; RV32IZFH: bb.1 (%ir-block.0):
+ ; RV32IZFH-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $f10_h
+ ; RV32IZFH-NEXT: {{ $}}
+ ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32IZFH-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32IZFH-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32IZFH-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32IZFH-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32IZFH-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY $f10_h
+ ; RV32IZFH-NEXT: $f10_h = COPY [[COPY8]](s16)
+ ; RV32IZFH-NEXT: PseudoRET implicit $f10_h
+ ret half %x
+}
+
+define half @callee_half_return_stack2(half %v1, half %v2, half %v3, half %v4, half %v5, half %v6, half %v7, half %v8, half %x) nounwind {
+ ; RV32I-LABEL: name: callee_half_return_stack2
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC8]](s16)
+ ; RV32I-NEXT: $x10 = COPY [[ANYEXT]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV32IZFH-LABEL: name: callee_half_return_stack2
+ ; RV32IZFH: bb.1 (%ir-block.0):
+ ; RV32IZFH-NEXT: liveins: $x10, $f10_h, $f11_h, $f12_h, $f13_h, $f14_h, $f15_h, $f16_h, $f17_h
+ ; RV32IZFH-NEXT: {{ $}}
+ ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+ ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+ ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f12_h
+ ; RV32IZFH-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY $f13_h
+ ; RV32IZFH-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY $f14_h
+ ; RV32IZFH-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY $f15_h
+ ; RV32IZFH-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY $f16_h
+ ; RV32IZFH-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY $f17_h
+ ; RV32IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+ ; RV32IZFH-NEXT: $f10_h = COPY [[TRUNC]](s16)
+ ; RV32IZFH-NEXT: PseudoRET implicit $f10_h
+ ret half %x
+}
|
; RV32I-NEXT: liveins: $x10 | ||
; RV32I-NEXT: {{ $}} | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I cannot read RISC-V, but should this case be rejected? It looks as if it looses precision due to the trunk/anyext pair.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I cannot read RISC-V, but should this case be rejected? It looks as if it looses precision due to the trunk/anyext pair.
half-precision fp type only occupies 16-bits.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nevermind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am wondering that whether we should handle nan-boxing here.
See https://dtcxzyw.github.io/riscv-isa-manual-host/unpriv-isa-asciidoc.html#nanboxing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SelectionDAG has this nan-boxing code in RISCVTargetLowering::splitValueIntoRegisterParts
. I think its only used when f32 is legal.
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
// nan, and cast to f32.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
DAG.getConstant(0xFFFF0000, DL, MVT::i32));
Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
Parts[0] = Val;
return true;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it should be handled during the legalization of G_FCONSTANT half. Can we merge this PR first?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test case:
; llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32 -verify-machineinstrs test.ll -o -
declare i32 @callee_half_in_regs(i32 %a, half %b) nounwind
define i32 @caller_half_in_regs() nounwind {
%1 = call i32 @callee_half_in_regs(i32 1, half 2.0)
ret i32 %1
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it should be handled during the legalization of G_FCONSTANT half. Can we merge this PR first?
I don't think that's what SelectionDAG does. It's handled during call handling. splitValueIntoRegisterParts isn't called just for constants.
But having said that, I don't know that SelectionDAG even needs to do it. I'm not sure anything really cares that the value is nan-boxed.
llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll
Outdated
Show resolved
Hide resolved
@@ -340,7 +340,7 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget, | |||
// supported yet. | |||
if (T->isIntegerTy()) | |||
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; | |||
if (T->isFloatTy() || T->isDoubleTy()) | |||
if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think this whole function should just be dropped and get the default treatment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch adds initial support to the half type on RISC-V. Signed-off-by: Hafidz Muzakky <[email protected]>
Hi @dtcxzyw If you compile with EXPENSIVE_CHECKS on and run the new testcase
Can also be seen if you just add |
Thank you! I will have a look. |
Confirmed. |
Fixes type check failure in #94110 (comment)
Fixes type check failure in llvm#94110 (comment)
No description provided.