Skip to content

Commit a636c23

Browse files
committed
[PowerPC] Change half to use soft promotion rather than PromoteFloat
On PowerPC targets, `half` uses the default legalization of promoting to a `f32`. However, this has some fundamental issues related to inability to round trip. Resolve this by switching to the soft legalization, which passes `f16` as an `i16`. The PowerPC ABI Specification does not define a `_Float16` type, so the calling convention changes are acceptable. Fixes the PowerPC portion of [1]. A similar change was done for MIPS in f0231b6 ("[MIPS] Use softPromoteHalf legalization for fp16 rather than PromoteFloat (#110199)") and for Loongarch in 13280d9 ("[loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791)"). [1]: #97975
1 parent 43e0891 commit a636c23

File tree

14 files changed

+3315
-5666
lines changed

14 files changed

+3315
-5666
lines changed

llvm/docs/ReleaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ Changes to the MIPS Backend
113113
Changes to the PowerPC Backend
114114
------------------------------
115115

116+
* `half` now uses a soft float ABI, which works correctly in more cases.
117+
116118
Changes to the RISC-V Backend
117119
-----------------------------
118120

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
843843
SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
844844
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
845845
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
846-
SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N);
846+
SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
847847
SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
848848
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
849849
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,8 @@ namespace llvm {
790790

791791
bool useSoftFloat() const override;
792792

793+
bool softPromoteHalfType() const override { return true; }
794+
793795
bool hasSPE() const;
794796

795797
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {

llvm/test/CodeGen/Generic/half.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
3030
; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
3131
; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %}
32-
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
33-
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
34-
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %}
32+
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
33+
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
34+
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
3535
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
3636
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
3737
; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}

llvm/test/CodeGen/PowerPC/atomics.ll

Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -469,39 +469,20 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) {
469469
define half @load_atomic_f16__seq_cst(ptr %ptr) {
470470
; PPC32-LABEL: load_atomic_f16__seq_cst:
471471
; PPC32: # %bb.0:
472-
; PPC32-NEXT: mflr r0
473-
; PPC32-NEXT: stwu r1, -16(r1)
474-
; PPC32-NEXT: stw r0, 20(r1)
475-
; PPC32-NEXT: .cfi_def_cfa_offset 16
476-
; PPC32-NEXT: .cfi_offset lr, 4
477472
; PPC32-NEXT: sync
478473
; PPC32-NEXT: lhz r3, 0(r3)
479474
; PPC32-NEXT: cmpw cr7, r3, r3
480475
; PPC32-NEXT: bne- cr7, .+4
481476
; PPC32-NEXT: isync
482-
; PPC32-NEXT: bl __extendhfsf2
483-
; PPC32-NEXT: lwz r0, 20(r1)
484-
; PPC32-NEXT: addi r1, r1, 16
485-
; PPC32-NEXT: mtlr r0
486477
; PPC32-NEXT: blr
487478
;
488479
; PPC64-LABEL: load_atomic_f16__seq_cst:
489480
; PPC64: # %bb.0:
490-
; PPC64-NEXT: mflr r0
491-
; PPC64-NEXT: stdu r1, -112(r1)
492-
; PPC64-NEXT: std r0, 128(r1)
493-
; PPC64-NEXT: .cfi_def_cfa_offset 112
494-
; PPC64-NEXT: .cfi_offset lr, 16
495481
; PPC64-NEXT: sync
496482
; PPC64-NEXT: lhz r3, 0(r3)
497483
; PPC64-NEXT: cmpd cr7, r3, r3
498484
; PPC64-NEXT: bne- cr7, .+4
499485
; PPC64-NEXT: isync
500-
; PPC64-NEXT: bl __extendhfsf2
501-
; PPC64-NEXT: nop
502-
; PPC64-NEXT: addi r1, r1, 112
503-
; PPC64-NEXT: ld r0, 16(r1)
504-
; PPC64-NEXT: mtlr r0
505486
; PPC64-NEXT: blr
506487
%val = load atomic half, ptr %ptr seq_cst, align 2
507488
ret half %val
@@ -575,44 +556,11 @@ define double @load_atomic_f64__seq_cst(ptr %ptr) {
575556
}
576557

577558
define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
578-
; PPC32-LABEL: store_atomic_f16__seq_cst:
579-
; PPC32: # %bb.0:
580-
; PPC32-NEXT: mflr r0
581-
; PPC32-NEXT: stwu r1, -16(r1)
582-
; PPC32-NEXT: stw r0, 20(r1)
583-
; PPC32-NEXT: .cfi_def_cfa_offset 16
584-
; PPC32-NEXT: .cfi_offset lr, 4
585-
; PPC32-NEXT: .cfi_offset r30, -8
586-
; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
587-
; PPC32-NEXT: mr r30, r3
588-
; PPC32-NEXT: bl __truncsfhf2
589-
; PPC32-NEXT: sync
590-
; PPC32-NEXT: sth r3, 0(r30)
591-
; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
592-
; PPC32-NEXT: lwz r0, 20(r1)
593-
; PPC32-NEXT: addi r1, r1, 16
594-
; PPC32-NEXT: mtlr r0
595-
; PPC32-NEXT: blr
596-
;
597-
; PPC64-LABEL: store_atomic_f16__seq_cst:
598-
; PPC64: # %bb.0:
599-
; PPC64-NEXT: mflr r0
600-
; PPC64-NEXT: stdu r1, -128(r1)
601-
; PPC64-NEXT: std r0, 144(r1)
602-
; PPC64-NEXT: .cfi_def_cfa_offset 128
603-
; PPC64-NEXT: .cfi_offset lr, 16
604-
; PPC64-NEXT: .cfi_offset r30, -16
605-
; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill
606-
; PPC64-NEXT: mr r30, r3
607-
; PPC64-NEXT: bl __truncsfhf2
608-
; PPC64-NEXT: nop
609-
; PPC64-NEXT: sync
610-
; PPC64-NEXT: sth r3, 0(r30)
611-
; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
612-
; PPC64-NEXT: addi r1, r1, 128
613-
; PPC64-NEXT: ld r0, 16(r1)
614-
; PPC64-NEXT: mtlr r0
615-
; PPC64-NEXT: blr
559+
; CHECK-LABEL: store_atomic_f16__seq_cst:
560+
; CHECK: # %bb.0:
561+
; CHECK-NEXT: sync
562+
; CHECK-NEXT: sth r4, 0(r3)
563+
; CHECK-NEXT: blr
616564
store atomic half %val1, ptr %ptr seq_cst, align 2
617565
ret void
618566
}

llvm/test/CodeGen/PowerPC/f128-conv.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,9 +1349,6 @@ define half @trunc(fp128 %a) nounwind {
13491349
; CHECK-NEXT: std r0, 48(r1)
13501350
; CHECK-NEXT: bl __trunckfhf2
13511351
; CHECK-NEXT: nop
1352-
; CHECK-NEXT: clrlwi r3, r3, 16
1353-
; CHECK-NEXT: mtfprwz f0, r3
1354-
; CHECK-NEXT: xscvhpdp f1, f0
13551352
; CHECK-NEXT: addi r1, r1, 32
13561353
; CHECK-NEXT: ld r0, 16(r1)
13571354
; CHECK-NEXT: mtlr r0
@@ -1364,9 +1361,6 @@ define half @trunc(fp128 %a) nounwind {
13641361
; CHECK-P8-NEXT: std r0, 48(r1)
13651362
; CHECK-P8-NEXT: bl __trunckfhf2
13661363
; CHECK-P8-NEXT: nop
1367-
; CHECK-P8-NEXT: clrldi r3, r3, 48
1368-
; CHECK-P8-NEXT: bl __extendhfsf2
1369-
; CHECK-P8-NEXT: nop
13701364
; CHECK-P8-NEXT: addi r1, r1, 32
13711365
; CHECK-P8-NEXT: ld r0, 16(r1)
13721366
; CHECK-P8-NEXT: mtlr r0
@@ -1379,15 +1373,20 @@ entry:
13791373
define fp128 @ext(half %a) nounwind {
13801374
; CHECK-LABEL: ext:
13811375
; CHECK: # %bb.0: # %entry
1382-
; CHECK-NEXT: xscpsgndp v2, f1, f1
1376+
; CHECK-NEXT: clrlwi r3, r3, 16
1377+
; CHECK-NEXT: mtfprwz f0, r3
1378+
; CHECK-NEXT: xscvhpdp v2, f0
13831379
; CHECK-NEXT: xscvdpqp v2, v2
13841380
; CHECK-NEXT: blr
13851381
;
13861382
; CHECK-P8-LABEL: ext:
13871383
; CHECK-P8: # %bb.0: # %entry
13881384
; CHECK-P8-NEXT: mflr r0
13891385
; CHECK-P8-NEXT: stdu r1, -32(r1)
1386+
; CHECK-P8-NEXT: clrldi r3, r3, 48
13901387
; CHECK-P8-NEXT: std r0, 48(r1)
1388+
; CHECK-P8-NEXT: bl __extendhfsf2
1389+
; CHECK-P8-NEXT: nop
13911390
; CHECK-P8-NEXT: bl __extendsfkf2
13921391
; CHECK-P8-NEXT: nop
13931392
; CHECK-P8-NEXT: addi r1, r1, 32

0 commit comments

Comments
 (0)