Skip to content

Commit 4d387c4

Browse files
authored
[X86] Add custom operation actions for f16: FABS, FNEG, and FCOPYSIGN (#128877)
This pull request adds custom handling for several floating-point operations for the `f16` type with respect to (#126892).. Fixes #126892
1 parent f5ee401 commit 4d387c4

File tree

3 files changed

+39
-61
lines changed

3 files changed

+39
-61
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
701701
setOperationAction(ISD::FSUB, MVT::f16, Promote);
702702
setOperationAction(ISD::FMUL, MVT::f16, Promote);
703703
setOperationAction(ISD::FDIV, MVT::f16, Promote);
704+
setOperationAction(ISD::FABS, MVT::f16, Custom);
705+
setOperationAction(ISD::FNEG, MVT::f16, Custom);
706+
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
704707
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
705708
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
706709
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);

llvm/test/CodeGen/X86/fp16-libcalls.ll

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,10 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind {
5959
define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind {
6060
; F16C-LABEL: test_half_copysign:
6161
; F16C: # %bb.0:
62-
; F16C-NEXT: vpextrw $0, %xmm1, %eax
63-
; F16C-NEXT: andl $32768, %eax # imm = 0x8000
64-
; F16C-NEXT: vpextrw $0, %xmm0, %ecx
65-
; F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
66-
; F16C-NEXT: orl %eax, %ecx
67-
; F16C-NEXT: movw %cx, (%rdi)
62+
; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
63+
; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
64+
; F16C-NEXT: vpor %xmm1, %xmm0, %xmm0
65+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
6866
; F16C-NEXT: retq
6967
;
7068
; FP16-LABEL: test_half_copysign:
@@ -76,23 +74,23 @@ define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind {
7674
;
7775
; X64-LABEL: test_half_copysign:
7876
; X64: # %bb.0:
79-
; X64-NEXT: pextrw $0, %xmm1, %eax
80-
; X64-NEXT: andl $32768, %eax # imm = 0x8000
81-
; X64-NEXT: pextrw $0, %xmm0, %ecx
82-
; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
83-
; X64-NEXT: orl %eax, %ecx
84-
; X64-NEXT: movw %cx, (%rdi)
77+
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
78+
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
79+
; X64-NEXT: por %xmm1, %xmm0
80+
; X64-NEXT: pextrw $0, %xmm0, %eax
81+
; X64-NEXT: movw %ax, (%rdi)
8582
; X64-NEXT: retq
8683
;
8784
; X86-LABEL: test_half_copysign:
8885
; X86: # %bb.0:
86+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
87+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
8988
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
90-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
91-
; X86-NEXT: andl $32768, %ecx # imm = 0x8000
92-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
93-
; X86-NEXT: andl $32767, %edx # imm = 0x7FFF
94-
; X86-NEXT: orl %ecx, %edx
95-
; X86-NEXT: movw %dx, (%eax)
89+
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
90+
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
91+
; X86-NEXT: por %xmm1, %xmm0
92+
; X86-NEXT: pextrw $0, %xmm0, %ecx
93+
; X86-NEXT: movw %cx, (%eax)
9694
; X86-NEXT: retl
9795
%res = call half @llvm.copysign.half(half %a0, half %a1)
9896
store half %res, ptr %p0, align 2
@@ -334,9 +332,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
334332
define void @test_half_fabs(half %a0, ptr %p0) nounwind {
335333
; F16C-LABEL: test_half_fabs:
336334
; F16C: # %bb.0:
337-
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
338-
; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
339-
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
335+
; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
340336
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
341337
; F16C-NEXT: retq
342338
;
@@ -349,14 +345,9 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
349345
;
350346
; X64-LABEL: test_half_fabs:
351347
; X64: # %bb.0:
352-
; X64-NEXT: pushq %rbx
353-
; X64-NEXT: movq %rdi, %rbx
354-
; X64-NEXT: callq __extendhfsf2@PLT
355348
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
356-
; X64-NEXT: callq __truncsfhf2@PLT
357349
; X64-NEXT: pextrw $0, %xmm0, %eax
358-
; X64-NEXT: movw %ax, (%rbx)
359-
; X64-NEXT: popq %rbx
350+
; X64-NEXT: movw %ax, (%rdi)
360351
; X64-NEXT: retq
361352
;
362353
; X86-LABEL: test_half_fabs:
@@ -514,9 +505,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
514505
define void @test_half_fneg(half %a0, ptr %p0) nounwind {
515506
; F16C-LABEL: test_half_fneg:
516507
; F16C: # %bb.0:
517-
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
518-
; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
519-
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
508+
; F16C-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
520509
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
521510
; F16C-NEXT: retq
522511
;
@@ -529,14 +518,9 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
529518
;
530519
; X64-LABEL: test_half_fneg:
531520
; X64: # %bb.0:
532-
; X64-NEXT: pushq %rbx
533-
; X64-NEXT: movq %rdi, %rbx
534-
; X64-NEXT: callq __extendhfsf2@PLT
535521
; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
536-
; X64-NEXT: callq __truncsfhf2@PLT
537522
; X64-NEXT: pextrw $0, %xmm0, %eax
538-
; X64-NEXT: movw %ax, (%rbx)
539-
; X64-NEXT: popq %rbx
523+
; X64-NEXT: movw %ax, (%rdi)
540524
; X64-NEXT: retq
541525
;
542526
; X86-LABEL: test_half_fneg:

llvm/test/CodeGen/X86/half.ll

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,6 @@ define void @main.158() #0 {
10411041
; CHECK-LIBCALL: # %bb.0: # %entry
10421042
; CHECK-LIBCALL-NEXT: pushq %rax
10431043
; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
1044-
; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
10451044
; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
10461045
; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
10471046
; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1
@@ -1059,10 +1058,10 @@ define void @main.158() #0 {
10591058
; BWON-F16C-LABEL: main.158:
10601059
; BWON-F16C: # %bb.0: # %entry
10611060
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
1062-
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1
1063-
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
1064-
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1065-
; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2
1061+
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1062+
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1063+
; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1
1064+
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
10661065
; BWON-F16C-NEXT: jae .LBB20_2
10671066
; BWON-F16C-NEXT: # %bb.1: # %entry
10681067
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
@@ -1074,8 +1073,7 @@ define void @main.158() #0 {
10741073
; CHECK-I686-LABEL: main.158:
10751074
; CHECK-I686: # %bb.0: # %entry
10761075
; CHECK-I686-NEXT: subl $12, %esp
1077-
; CHECK-I686-NEXT: movl $0, (%esp)
1078-
; CHECK-I686-NEXT: calll __truncsfhf2
1076+
; CHECK-I686-NEXT: pxor %xmm0, %xmm0
10791077
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
10801078
; CHECK-I686-NEXT: movw %ax, (%esp)
10811079
; CHECK-I686-NEXT: calll __extendhfsf2
@@ -1192,32 +1190,25 @@ entry:
11921190
define half @fcopysign(half %x, half %y) {
11931191
; CHECK-LIBCALL-LABEL: fcopysign:
11941192
; CHECK-LIBCALL: # %bb.0:
1195-
; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax
1196-
; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000
1197-
; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx
1198-
; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF
1199-
; CHECK-LIBCALL-NEXT: orl %eax, %ecx
1200-
; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0
1193+
; CHECK-LIBCALL-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1194+
; CHECK-LIBCALL-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1195+
; CHECK-LIBCALL-NEXT: orps %xmm1, %xmm0
12011196
; CHECK-LIBCALL-NEXT: retq
12021197
;
12031198
; BWON-F16C-LABEL: fcopysign:
12041199
; BWON-F16C: # %bb.0:
1205-
; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
1206-
; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000
1207-
; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx
1208-
; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
1209-
; BWON-F16C-NEXT: orl %eax, %ecx
1210-
; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
1200+
; BWON-F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1201+
; BWON-F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1202+
; BWON-F16C-NEXT: vorps %xmm1, %xmm0, %xmm0
12111203
; BWON-F16C-NEXT: retq
12121204
;
12131205
; CHECK-I686-LABEL: fcopysign:
12141206
; CHECK-I686: # %bb.0:
1215-
; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000
1216-
; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax
1217-
; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1218-
; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF
1219-
; CHECK-I686-NEXT: orl %eax, %ecx
1220-
; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0
1207+
; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1208+
; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
1209+
; CHECK-I686-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1210+
; CHECK-I686-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1211+
; CHECK-I686-NEXT: por %xmm1, %xmm0
12211212
; CHECK-I686-NEXT: retl
12221213
%a = call half @llvm.copysign.f16(half %x, half %y)
12231214
ret half %a

0 commit comments

Comments
 (0)