Skip to content

Commit 1a2a1fb

Browse files
authored
[WebAssembly] Implement prototype f32.load_f16 instruction. (#90906)
Adds a builtin and intrinsic for the f32.load_f16 instruction. The instruction loads an f16 value from memory and puts it in an f32. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md Note: the current spec has f32.load_f16 as opcode 0xFD0120, but this is incorrect and will be changed to 0xFC30 soon.
1 parent cf58c58 commit 1a2a1fb

File tree

10 files changed

+84
-4
lines changed

10 files changed

+84
-4
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8, "V8sV16ScV16Sc",
190190
TARGET_BUILTIN(__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4, "V4iV16ScV16ScV4i", "nc", "relaxed-simd")
191191
TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f", "nc", "relaxed-simd")
192192

193+
// Half-Precision (fp16)
194+
TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
195+
193196
// Reference Types builtins
194197
// Some builtins are custom type-checked - see 't' as part of the third argument,
195198
// in which case the argument spec (second argument) is unused.

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21303,6 +21303,11 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2130321303
CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
2130421304
return Builder.CreateCall(Callee, {LHS, RHS, Acc});
2130521305
}
21306+
case WebAssembly::BI__builtin_wasm_loadf16_f32: {
21307+
Value *Addr = EmitScalarExpr(E->getArg(0));
21308+
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
21309+
return Builder.CreateCall(Callee, {Addr});
21310+
}
2130621311
case WebAssembly::BI__builtin_wasm_table_get: {
2130721312
assert(E->getArg(0)->getType()->isArrayType());
2130821313
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);

clang/test/CodeGen/builtins-wasm.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
2-
// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
1+
// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +half-precision -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
2+
// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +half-precision -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
33
// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
44

55
// SIMD convenience types
@@ -802,6 +802,11 @@ f32x4 relaxed_dot_bf16x8_add_f32_f32x4(u16x8 a, u16x8 b, f32x4 c) {
802802
// WEBASSEMBLY-NEXT: ret
803803
}
804804

805+
float load_f16_f32(__fp16 *addr) {
806+
return __builtin_wasm_loadf16_f32(addr);
807+
// WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}})
808+
}
809+
805810
__externref_t externref_null() {
806811
return __builtin_wasm_ref_null_extern();
807812
// WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern()

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,18 @@ def int_wasm_relaxed_dot_bf16x8_add_f32:
321321
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4f32_ty],
322322
[IntrNoMem, IntrSpeculatable]>;
323323

324+
//===----------------------------------------------------------------------===//
325+
// Half-precision intrinsics (experimental)
326+
//===----------------------------------------------------------------------===//
327+
328+
// TODO: Replace these intrinsic with normal ISel patterns once the XXX
329+
// instructions are merged to the proposal.
330+
def int_wasm_loadf16_f32:
331+
Intrinsic<[llvm_float_ty],
332+
[llvm_ptr_ty],
333+
[IntrReadMem, IntrArgMemOnly],
334+
"", [SDNPMemOperand]>;
335+
324336

325337
//===----------------------------------------------------------------------===//
326338
// Thread-local storage intrinsics

llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
206206
WASM_LOAD_STORE(LOAD16_SPLAT)
207207
WASM_LOAD_STORE(LOAD_LANE_I16x8)
208208
WASM_LOAD_STORE(STORE_LANE_I16x8)
209+
WASM_LOAD_STORE(LOAD_F16_F32)
209210
return 1;
210211
WASM_LOAD_STORE(LOAD_I32)
211212
WASM_LOAD_STORE(LOAD_F32)

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
906906
Info.align = Align(8);
907907
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
908908
return true;
909+
case Intrinsic::wasm_loadf16_f32:
910+
Info.opc = ISD::INTRINSIC_W_CHAIN;
911+
Info.memVT = MVT::f16;
912+
Info.ptrVal = I.getArgOperand(0);
913+
Info.offset = 0;
914+
Info.align = Align(2);
915+
Info.flags = MachineMemOperand::MOLoad;
916+
return true;
909917
default:
910918
return false;
911919
}

llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>;
7272
defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>;
7373
defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>;
7474

75+
// Half Precision
76+
defm LOAD_F16_F32 : WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;
77+
7578
// Pattern matching
7679

7780
multiclass LoadPat<ValueType ty, SDPatternOperator kind, string Name> {
@@ -111,6 +114,8 @@ defm : LoadPat<i64, extloadi8, "LOAD8_U_I64">;
111114
defm : LoadPat<i64, extloadi16, "LOAD16_U_I64">;
112115
defm : LoadPat<i64, extloadi32, "LOAD32_U_I64">;
113116

117+
defm : LoadPat<f32, int_wasm_loadf16_f32, "LOAD_F16_F32">;
118+
114119
// Defines atomic and non-atomic stores, regular and truncating
115120
multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode,
116121
list<Predicate> reqs = []> {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s
2+
; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s
3+
4+
declare float @llvm.wasm.loadf32.f16(ptr)
5+
6+
; CHECK-LABEL: ldf16_32:
7+
; CHECK: f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}}
8+
; CHECK-NEXT: return $pop[[NUM0]]{{$}}
9+
define float @ldf16_32(ptr %p) {
10+
%v = call float @llvm.wasm.loadf16.f32(ptr %p)
11+
ret float %v
12+
}

llvm/test/CodeGen/WebAssembly/offset.ll

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -disable-wasm-fallthrough-return-opt | FileCheck %s
1+
; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -disable-wasm-fallthrough-return-opt -mattr=+half-precision | FileCheck %s
22

33
; Test constant load and store address offsets.
44

@@ -666,3 +666,29 @@ define {i32,i32,i32,i32} @aggregate_return() {
666666
define {i64,i32,i16,i8} @aggregate_return_without_merge() {
667667
ret {i64,i32,i16,i8} zeroinitializer
668668
}
669+
670+
;===----------------------------------------------------------------------------
671+
; Loads: Half Precision
672+
;===----------------------------------------------------------------------------
673+
674+
; Fold an offset into a zero-extending load.
675+
676+
; CHECK-LABEL: load_f16_f32_with_folded_offset:
677+
; CHECK: f32.load_f16 $push0=, 24($0){{$}}
678+
define float @load_f16_f32_with_folded_offset(ptr %p) {
679+
%q = ptrtoint ptr %p to i32
680+
%r = add nuw i32 %q, 24
681+
%s = inttoptr i32 %r to ptr
682+
%t = call float @llvm.wasm.loadf16.f32(ptr %s)
683+
ret float %t
684+
}
685+
686+
; Fold a gep offset into a zero-extending load.
687+
688+
; CHECK-LABEL: load_f16_f32_with_folded_gep_offset:
689+
; CHECK: f32.load_f16 $push0=, 24($0){{$}}
690+
define float @load_f16_f32_with_folded_gep_offset(ptr %p) {
691+
%s = getelementptr inbounds i8, ptr %p, i32 24
692+
%t = call float @llvm.wasm.loadf16.f32(ptr %s)
693+
ret float %t
694+
}

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd < %s | FileCheck %s
1+
# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd,+half-precision < %s | FileCheck %s
22

33
main:
44
.functype main () -> ()
@@ -839,4 +839,7 @@ main:
839839
# CHECK: i32x4.relaxed_dot_i8x16_i7x16_add_s # encoding: [0xfd,0x93,0x02]
840840
i32x4.relaxed_dot_i8x16_i7x16_add_s
841841

842+
# CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30]
843+
f32.load_f16 48
844+
842845
end_function

0 commit comments

Comments
 (0)